From 0d974f3969a6fe233fd9ce0dca6a4f2b59f71bd2 Mon Sep 17 00:00:00 2001
From: ChillerDragon <chillerdragon@gmail.com>
Date: Tue, 13 Jan 2026 16:48:38 +0100
Subject: [PATCH] Update mimalloc to 3.2.6

```
rm -rf 3rd/mimalloc-2.0.9
wget https://github.com/microsoft/mimalloc/archive/refs/tags/v3.2.6.zip
unzip v3.2.6.zip
rm v3.2.6.zip
mv mimalloc-3.2.6 3rd
sed -i 's/mimalloc-2.0.9/mimalloc-3.2.6/g' CodeFormatServer/CMakeLists.txt
```

Closed #216
---
 3rd/mimalloc-2.0.9/CMakeLists.txt             |  486 ----
 3rd/mimalloc-2.0.9/azure-pipelines.yml        |  181 --
 .../ide/vs2017/mimalloc-override-test.vcxproj |  190 --
 .../ide/vs2017/mimalloc-override.vcxproj      |  256 --
 .../ide/vs2017/mimalloc-test-stress.vcxproj   |  159 --
 .../ide/vs2017/mimalloc-test.vcxproj          |  158 --
 3rd/mimalloc-2.0.9/ide/vs2017/mimalloc.sln    |   71 -
 .../ide/vs2017/mimalloc.vcxproj               |  262 --
 .../ide/vs2019/mimalloc-override-test.vcxproj |  190 --
 .../ide/vs2019/mimalloc-override.vcxproj      |  256 --
 .../ide/vs2019/mimalloc-test-api.vcxproj      |  155 --
 .../ide/vs2019/mimalloc-test-stress.vcxproj   |  159 --
 .../ide/vs2019/mimalloc-test.vcxproj          |  158 --
 3rd/mimalloc-2.0.9/ide/vs2019/mimalloc.sln    |   81 -
 .../ide/vs2019/mimalloc.vcxproj               |  254 --
 .../ide/vs2022/mimalloc-override.vcxproj      |  257 --
 .../ide/vs2022/mimalloc-test-api.vcxproj      |  155 --
 3rd/mimalloc-2.0.9/ide/vs2022/mimalloc.sln    |   81 -
 .../ide/vs2022/mimalloc.vcxproj               |  255 --
 .../include/mimalloc-internal.h               | 1116 --------
 3rd/mimalloc-2.0.9/include/mimalloc-track.h   |   62 -
 3rd/mimalloc-2.0.9/include/mimalloc-types.h   |  609 -----
 3rd/mimalloc-2.0.9/src/alloc-aligned.c        |  306 ---
 3rd/mimalloc-2.0.9/src/alloc.c                | 1031 --------
 3rd/mimalloc-2.0.9/src/arena.c                |  536 ----
 3rd/mimalloc-2.0.9/src/bitmap.c               |  414 ---
 3rd/mimalloc-2.0.9/src/bitmap.h               |  111 -
 3rd/mimalloc-2.0.9/src/heap.c                 |  602 -----
 3rd/mimalloc-2.0.9/src/init.c                 |  716 -----
 3rd/mimalloc-2.0.9/src/options.c              |  642 -----
 3rd/mimalloc-2.0.9/src/os.c                   | 1479 -----------
 3rd/mimalloc-2.0.9/src/page-queue.c           |  332 ---
 3rd/mimalloc-2.0.9/src/page.c                 |  926 -------
 3rd/mimalloc-2.0.9/src/region.c               |  516 ----
 3rd/mimalloc-2.0.9/src/segment-cache.c        |  409 ---
 3rd/mimalloc-2.0.9/src/segment.c              | 1623 ------------
 3rd/mimalloc-2.0.9/src/stats.c                |  618 -----
 3rd/mimalloc-2.0.9/test/main-override.cpp     |  343 ---
 .../.gitattributes                            |    0
 .../.gitignore                                |    6 +-
 3rd/mimalloc-3.2.6/CMakeLists.txt             |  802 ++++++
 .../LICENSE                                   |    2 +-
 3rd/mimalloc-3.2.6/SECURITY.md                |   41 +
 3rd/mimalloc-3.2.6/azure-pipelines.yml        |  262 ++
 3rd/mimalloc-3.2.6/bin/readme.md              |  118 +
 .../cmake/JoinPaths.cmake                     |    0
 .../cmake/mimalloc-config-version.cmake       |    6 +-
 .../cmake/mimalloc-config.cmake               |    0
 .../contrib/docker/alpine-arm32v7/Dockerfile  |   28 +
 .../contrib/docker/alpine-x86/Dockerfile      |   28 +
 .../contrib/docker/alpine/Dockerfile          |   23 +
 .../contrib/docker/manylinux-x64/Dockerfile   |   23 +
 3rd/mimalloc-3.2.6/contrib/docker/readme.md   |   10 +
 .../contrib/vcpkg/portfile.cmake              |   64 +
 3rd/mimalloc-3.2.6/contrib/vcpkg/readme.md    |   40 +
 3rd/mimalloc-3.2.6/contrib/vcpkg/usage        |   20 +
 .../contrib/vcpkg/vcpkg-cmake-wrapper.cmake   |   20 +
 3rd/mimalloc-3.2.6/contrib/vcpkg/vcpkg.json   |   48 +
 .../bench-c5-18xlarge-2020-01-20-a.svg        |    0
 .../bench-c5-18xlarge-2020-01-20-b.svg        |    0
 .../bench-c5-18xlarge-2020-01-20-rss-a.svg    |    0
 .../bench-c5-18xlarge-2020-01-20-rss-b.svg    |    0
 .../doc/bench-2020/bench-r5a-1.svg            |    0
 .../bench-r5a-12xlarge-2020-01-16-a.svg       |    0
 .../bench-r5a-12xlarge-2020-01-16-b.svg       |    0
 .../doc/bench-2020/bench-r5a-2.svg            |    0
 .../doc/bench-2020/bench-r5a-rss-1.svg        |    0
 .../doc/bench-2020/bench-r5a-rss-2.svg        |    0
 .../doc/bench-2020/bench-spec-rss.svg         |    0
 .../doc/bench-2020/bench-spec.svg             |    0
 .../doc/bench-2020/bench-z4-1.svg             |    0
 .../doc/bench-2020/bench-z4-2.svg             |    0
 .../doc/bench-2020/bench-z4-rss-1.svg         |    0
 .../doc/bench-2020/bench-z4-rss-2.svg         |    0
 .../bench-amd5950x-2021-01-30-a.svg           |    0
 .../bench-amd5950x-2021-01-30-b.svg           |    0
 .../bench-c5-18xlarge-2021-01-30-a.svg        |    0
 .../bench-c5-18xlarge-2021-01-30-b.svg        |    0
 .../bench-c5-18xlarge-2021-01-30-rss-a.svg    |    0
 .../bench-c5-18xlarge-2021-01-30-rss-b.svg    |    0
 .../bench-2021/bench-macmini-2021-01-30.svg   |    0
 .../doc/doxyfile                              |  700 +++--
 .../doc/ds-logo.jpg                           |  Bin
 .../doc/ds-logo.png                           |  Bin
 .../doc/mimalloc-doc.h                        |  434 +++-
 .../doc/mimalloc-doxygen.css                  |   11 +
 .../doc/mimalloc-logo-100.png                 |  Bin
 .../doc/mimalloc-logo.png                     |  Bin
 .../doc/mimalloc-logo.svg                     |    0
 .../doc/spades-logo.png                       |  Bin
 .../doc/unreal-logo.svg                       |    0
 .../ide/vs2022/mimalloc-lib.vcxproj           |  502 ++++
 .../ide/vs2022/mimalloc-lib.vcxproj.filters   |  114 +
 .../ide/vs2022/mimalloc-override-dll.vcxproj  |  517 ++++
 .../mimalloc-override-dll.vcxproj.filters     |  114 +
 .../vs2022/mimalloc-override-test-dep.vcxproj |  355 +++
 .../ide/vs2022/mimalloc-override-test.vcxproj |  178 +-
 .../ide/vs2022/mimalloc-test-api.vcxproj      |  295 +++
 .../ide/vs2022/mimalloc-test-stress.vcxproj   |  135 +-
 .../ide/vs2022/mimalloc-test.vcxproj          |  141 +-
 3rd/mimalloc-3.2.6/ide/vs2022/mimalloc.sln    |  151 ++
 .../include/mimalloc-new-delete.h             |    0
 .../include/mimalloc-override.h               |    3 +-
 3rd/mimalloc-3.2.6/include/mimalloc-stats.h   |  149 ++
 .../include/mimalloc.h                        |  382 ++-
 .../include/mimalloc/atomic.h}                |  300 ++-
 3rd/mimalloc-3.2.6/include/mimalloc/bits.h    |  342 +++
 .../include/mimalloc/internal.h               | 1287 +++++++++
 3rd/mimalloc-3.2.6/include/mimalloc/prim.h    |  501 ++++
 3rd/mimalloc-3.2.6/include/mimalloc/track.h   |  145 ++
 3rd/mimalloc-3.2.6/include/mimalloc/types.h   |  705 +++++
 .../mimalloc.pc.in                            |    8 +-
 .../readme.md                                 |  456 +++-
 3rd/mimalloc-3.2.6/src/alloc-aligned.c        |  439 ++++
 .../src/alloc-override.c                      |   49 +-
 .../src/alloc-posix.c                         |    7 +-
 3rd/mimalloc-3.2.6/src/alloc.c                |  846 ++++++
 3rd/mimalloc-3.2.6/src/arena-meta.c           |  179 ++
 3rd/mimalloc-3.2.6/src/arena.c                | 2311 +++++++++++++++++
 3rd/mimalloc-3.2.6/src/bitmap.c               | 1882 ++++++++++++++
 3rd/mimalloc-3.2.6/src/bitmap.h               |  339 +++
 3rd/mimalloc-3.2.6/src/free.c                 |  617 +++++
 3rd/mimalloc-3.2.6/src/heap.c                 |  214 ++
 3rd/mimalloc-3.2.6/src/init.c                 |  957 +++++++
 3rd/mimalloc-3.2.6/src/libc.c                 |  432 +++
 3rd/mimalloc-3.2.6/src/options.c              |  694 +++++
 3rd/mimalloc-3.2.6/src/os.c                   |  858 ++++++
 3rd/mimalloc-3.2.6/src/page-map.c             |  429 +++
 3rd/mimalloc-3.2.6/src/page-queue.c           |  455 ++++
 3rd/mimalloc-3.2.6/src/page.c                 | 1020 ++++++++
 3rd/mimalloc-3.2.6/src/prim/emscripten/prim.c |  252 ++
 .../src/prim/osx/alloc-override-zone.c}       |   20 +-
 3rd/mimalloc-3.2.6/src/prim/osx/prim.c        |    9 +
 3rd/mimalloc-3.2.6/src/prim/prim.c            |   76 +
 3rd/mimalloc-3.2.6/src/prim/readme.md         |    9 +
 3rd/mimalloc-3.2.6/src/prim/unix/prim.c       |  991 +++++++
 3rd/mimalloc-3.2.6/src/prim/wasi/prim.c       |  288 ++
 .../src/prim/windows/etw-mimalloc.wprp        |   61 +
 3rd/mimalloc-3.2.6/src/prim/windows/etw.h     |  905 +++++++
 3rd/mimalloc-3.2.6/src/prim/windows/etw.man   |  Bin 0 -> 3926 bytes
 3rd/mimalloc-3.2.6/src/prim/windows/prim.c    |  921 +++++++
 3rd/mimalloc-3.2.6/src/prim/windows/readme.md |   17 +
 .../src/random.c                              |  199 +-
 .../src/static.c                              |   32 +-
 3rd/mimalloc-3.2.6/src/stats.c                |  790 ++++++
 3rd/mimalloc-3.2.6/src/theap.c                |  674 +++++
 3rd/mimalloc-3.2.6/src/threadlocal.c          |  174 ++
 .../test/CMakeLists.txt                       |   12 +-
 3rd/mimalloc-3.2.6/test/main-override-dep.cpp |   60 +
 3rd/mimalloc-3.2.6/test/main-override-dep.h   |   12 +
 .../test/main-override-static.c               |  240 +-
 .../test/main-override.c                      |    0
 3rd/mimalloc-3.2.6/test/main-override.cpp     |  542 ++++
 .../test/main.c                               |   14 +-
 .../test/readme.md                            |    0
 .../test/test-api-fill.c                      |   11 +-
 .../test/test-api.c                           |  264 +-
 .../test/test-stress.c                        |  240 +-
 .../test/test-wrong.c                         |   26 +-
 .../test/testhelper.h                         |    0
 CodeFormatServer/CMakeLists.txt               |    4 +-
 161 files changed, 26993 insertions(+), 17199 deletions(-)
 delete mode 100644 3rd/mimalloc-2.0.9/CMakeLists.txt
 delete mode 100644 3rd/mimalloc-2.0.9/azure-pipelines.yml
 delete mode 100644 3rd/mimalloc-2.0.9/ide/vs2017/mimalloc-override-test.vcxproj
 delete mode 100644 3rd/mimalloc-2.0.9/ide/vs2017/mimalloc-override.vcxproj
 delete mode 100644 3rd/mimalloc-2.0.9/ide/vs2017/mimalloc-test-stress.vcxproj
 delete mode 100644 3rd/mimalloc-2.0.9/ide/vs2017/mimalloc-test.vcxproj
 delete mode 100644 3rd/mimalloc-2.0.9/ide/vs2017/mimalloc.sln
 delete mode 100644 3rd/mimalloc-2.0.9/ide/vs2017/mimalloc.vcxproj
 delete mode 100644 3rd/mimalloc-2.0.9/ide/vs2019/mimalloc-override-test.vcxproj
 delete mode 100644 3rd/mimalloc-2.0.9/ide/vs2019/mimalloc-override.vcxproj
 delete mode 100644 3rd/mimalloc-2.0.9/ide/vs2019/mimalloc-test-api.vcxproj
 delete mode 100644 3rd/mimalloc-2.0.9/ide/vs2019/mimalloc-test-stress.vcxproj
 delete mode 100644 3rd/mimalloc-2.0.9/ide/vs2019/mimalloc-test.vcxproj
 delete mode 100644 3rd/mimalloc-2.0.9/ide/vs2019/mimalloc.sln
 delete mode 100644 3rd/mimalloc-2.0.9/ide/vs2019/mimalloc.vcxproj
 delete mode 100644 3rd/mimalloc-2.0.9/ide/vs2022/mimalloc-override.vcxproj
 delete mode 100644 3rd/mimalloc-2.0.9/ide/vs2022/mimalloc-test-api.vcxproj
 delete mode 100644 3rd/mimalloc-2.0.9/ide/vs2022/mimalloc.sln
 delete mode 100644 3rd/mimalloc-2.0.9/ide/vs2022/mimalloc.vcxproj
 delete mode 100644 3rd/mimalloc-2.0.9/include/mimalloc-internal.h
 delete mode 100644 3rd/mimalloc-2.0.9/include/mimalloc-track.h
 delete mode 100644 3rd/mimalloc-2.0.9/include/mimalloc-types.h
 delete mode 100644 3rd/mimalloc-2.0.9/src/alloc-aligned.c
 delete mode 100644 3rd/mimalloc-2.0.9/src/alloc.c
 delete mode 100644 3rd/mimalloc-2.0.9/src/arena.c
 delete mode 100644 3rd/mimalloc-2.0.9/src/bitmap.c
 delete mode 100644 3rd/mimalloc-2.0.9/src/bitmap.h
 delete mode 100644 3rd/mimalloc-2.0.9/src/heap.c
 delete mode 100644 3rd/mimalloc-2.0.9/src/init.c
 delete mode 100644 3rd/mimalloc-2.0.9/src/options.c
 delete mode 100644 3rd/mimalloc-2.0.9/src/os.c
 delete mode 100644 3rd/mimalloc-2.0.9/src/page-queue.c
 delete mode 100644 3rd/mimalloc-2.0.9/src/page.c
 delete mode 100644 3rd/mimalloc-2.0.9/src/region.c
 delete mode 100644 3rd/mimalloc-2.0.9/src/segment-cache.c
 delete mode 100644 3rd/mimalloc-2.0.9/src/segment.c
 delete mode 100644 3rd/mimalloc-2.0.9/src/stats.c
 delete mode 100644 3rd/mimalloc-2.0.9/test/main-override.cpp
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/.gitattributes (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/.gitignore (74%)
 create mode 100644 3rd/mimalloc-3.2.6/CMakeLists.txt
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/LICENSE (94%)
 create mode 100644 3rd/mimalloc-3.2.6/SECURITY.md
 create mode 100644 3rd/mimalloc-3.2.6/azure-pipelines.yml
 create mode 100644 3rd/mimalloc-3.2.6/bin/readme.md
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/cmake/JoinPaths.cmake (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/cmake/mimalloc-config-version.cmake (89%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/cmake/mimalloc-config.cmake (100%)
 create mode 100644 3rd/mimalloc-3.2.6/contrib/docker/alpine-arm32v7/Dockerfile
 create mode 100644 3rd/mimalloc-3.2.6/contrib/docker/alpine-x86/Dockerfile
 create mode 100644 3rd/mimalloc-3.2.6/contrib/docker/alpine/Dockerfile
 create mode 100644 3rd/mimalloc-3.2.6/contrib/docker/manylinux-x64/Dockerfile
 create mode 100644 3rd/mimalloc-3.2.6/contrib/docker/readme.md
 create mode 100644 3rd/mimalloc-3.2.6/contrib/vcpkg/portfile.cmake
 create mode 100644 3rd/mimalloc-3.2.6/contrib/vcpkg/readme.md
 create mode 100644 3rd/mimalloc-3.2.6/contrib/vcpkg/usage
 create mode 100644 3rd/mimalloc-3.2.6/contrib/vcpkg/vcpkg-cmake-wrapper.cmake
 create mode 100644 3rd/mimalloc-3.2.6/contrib/vcpkg/vcpkg.json
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/bench-2020/bench-c5-18xlarge-2020-01-20-a.svg (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/bench-2020/bench-c5-18xlarge-2020-01-20-b.svg (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/bench-2020/bench-c5-18xlarge-2020-01-20-rss-a.svg (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/bench-2020/bench-c5-18xlarge-2020-01-20-rss-b.svg (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/bench-2020/bench-r5a-1.svg (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/bench-2020/bench-r5a-12xlarge-2020-01-16-a.svg (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/bench-2020/bench-r5a-12xlarge-2020-01-16-b.svg (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/bench-2020/bench-r5a-2.svg (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/bench-2020/bench-r5a-rss-1.svg (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/bench-2020/bench-r5a-rss-2.svg (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/bench-2020/bench-spec-rss.svg (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/bench-2020/bench-spec.svg (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/bench-2020/bench-z4-1.svg (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/bench-2020/bench-z4-2.svg (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/bench-2020/bench-z4-rss-1.svg (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/bench-2020/bench-z4-rss-2.svg (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/bench-2021/bench-amd5950x-2021-01-30-a.svg (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/bench-2021/bench-amd5950x-2021-01-30-b.svg (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/bench-2021/bench-c5-18xlarge-2021-01-30-a.svg (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/bench-2021/bench-c5-18xlarge-2021-01-30-b.svg (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/bench-2021/bench-c5-18xlarge-2021-01-30-rss-a.svg (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/bench-2021/bench-c5-18xlarge-2021-01-30-rss-b.svg (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/bench-2021/bench-macmini-2021-01-30.svg (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/doxyfile (80%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/ds-logo.jpg (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/ds-logo.png (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/mimalloc-doc.h (70%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/mimalloc-doxygen.css (83%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/mimalloc-logo-100.png (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/mimalloc-logo.png (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/mimalloc-logo.svg (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/spades-logo.png (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/doc/unreal-logo.svg (100%)
 create mode 100644 3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-lib.vcxproj
 create mode 100644 3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-lib.vcxproj.filters
 create mode 100644 3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-override-dll.vcxproj
 create mode 100644 3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-override-dll.vcxproj.filters
 create mode 100644 3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-override-test-dep.vcxproj
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/ide/vs2022/mimalloc-override-test.vcxproj (51%)
 create mode 100644 3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-test-api.vcxproj
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/ide/vs2022/mimalloc-test-stress.vcxproj (52%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/ide/vs2022/mimalloc-test.vcxproj (52%)
 create mode 100644 3rd/mimalloc-3.2.6/ide/vs2022/mimalloc.sln
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/include/mimalloc-new-delete.h (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/include/mimalloc-override.h (96%)
 create mode 100644 3rd/mimalloc-3.2.6/include/mimalloc-stats.h
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/include/mimalloc.h (61%)
 rename 3rd/{mimalloc-2.0.9/include/mimalloc-atomic.h => mimalloc-3.2.6/include/mimalloc/atomic.h} (58%)
 create mode 100644 3rd/mimalloc-3.2.6/include/mimalloc/bits.h
 create mode 100644 3rd/mimalloc-3.2.6/include/mimalloc/internal.h
 create mode 100644 3rd/mimalloc-3.2.6/include/mimalloc/prim.h
 create mode 100644 3rd/mimalloc-3.2.6/include/mimalloc/track.h
 create mode 100644 3rd/mimalloc-3.2.6/include/mimalloc/types.h
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/mimalloc.pc.in (64%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/readme.md (65%)
 create mode 100644 3rd/mimalloc-3.2.6/src/alloc-aligned.c
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/src/alloc-override.c (86%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/src/alloc-posix.c (96%)
 create mode 100644 3rd/mimalloc-3.2.6/src/alloc.c
 create mode 100644 3rd/mimalloc-3.2.6/src/arena-meta.c
 create mode 100644 3rd/mimalloc-3.2.6/src/arena.c
 create mode 100644 3rd/mimalloc-3.2.6/src/bitmap.c
 create mode 100644 3rd/mimalloc-3.2.6/src/bitmap.h
 create mode 100644 3rd/mimalloc-3.2.6/src/free.c
 create mode 100644 3rd/mimalloc-3.2.6/src/heap.c
 create mode 100644 3rd/mimalloc-3.2.6/src/init.c
 create mode 100644 3rd/mimalloc-3.2.6/src/libc.c
 create mode 100644 3rd/mimalloc-3.2.6/src/options.c
 create mode 100644 3rd/mimalloc-3.2.6/src/os.c
 create mode 100644 3rd/mimalloc-3.2.6/src/page-map.c
 create mode 100644 3rd/mimalloc-3.2.6/src/page-queue.c
 create mode 100644 3rd/mimalloc-3.2.6/src/page.c
 create mode 100644 3rd/mimalloc-3.2.6/src/prim/emscripten/prim.c
 rename 3rd/{mimalloc-2.0.9/src/alloc-override-osx.c => mimalloc-3.2.6/src/prim/osx/alloc-override-zone.c} (95%)
 create mode 100644 3rd/mimalloc-3.2.6/src/prim/osx/prim.c
 create mode 100644 3rd/mimalloc-3.2.6/src/prim/prim.c
 create mode 100644 3rd/mimalloc-3.2.6/src/prim/readme.md
 create mode 100644 3rd/mimalloc-3.2.6/src/prim/unix/prim.c
 create mode 100644 3rd/mimalloc-3.2.6/src/prim/wasi/prim.c
 create mode 100644 3rd/mimalloc-3.2.6/src/prim/windows/etw-mimalloc.wprp
 create mode 100644 3rd/mimalloc-3.2.6/src/prim/windows/etw.h
 create mode 100644 3rd/mimalloc-3.2.6/src/prim/windows/etw.man
 create mode 100644 3rd/mimalloc-3.2.6/src/prim/windows/prim.c
 create mode 100644 3rd/mimalloc-3.2.6/src/prim/windows/readme.md
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/src/random.c (58%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/src/static.c (75%)
 create mode 100644 3rd/mimalloc-3.2.6/src/stats.c
 create mode 100644 3rd/mimalloc-3.2.6/src/theap.c
 create mode 100644 3rd/mimalloc-3.2.6/src/threadlocal.c
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/test/CMakeLists.txt (86%)
 create mode 100644 3rd/mimalloc-3.2.6/test/main-override-dep.cpp
 create mode 100644 3rd/mimalloc-3.2.6/test/main-override-dep.h
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/test/main-override-static.c (64%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/test/main-override.c (100%)
 create mode 100644 3rd/mimalloc-3.2.6/test/main-override.cpp
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/test/main.c (71%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/test/readme.md (100%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/test/test-api-fill.c (98%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/test/test-api.c (54%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/test/test-stress.c (58%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/test/test-wrong.c (74%)
 rename 3rd/{mimalloc-2.0.9 => mimalloc-3.2.6}/test/testhelper.h (100%)

diff --git a/3rd/mimalloc-2.0.9/CMakeLists.txt b/3rd/mimalloc-2.0.9/CMakeLists.txt
deleted file mode 100644
index 74c1f291..00000000
--- a/3rd/mimalloc-2.0.9/CMakeLists.txt
+++ /dev/null
@@ -1,486 +0,0 @@
-cmake_minimum_required(VERSION 3.13)
-project(libmimalloc C CXX)
-
-set(CMAKE_C_STANDARD 11)
-set(CMAKE_CXX_STANDARD 17)
-
-option(MI_SECURE            "Use full security mitigations (like guard pages, allocation randomization, double-free mitigation, and free-list corruption detection)" OFF)
-option(MI_DEBUG_FULL        "Use full internal heap invariant checking in DEBUG mode (expensive)" OFF)
-option(MI_PADDING           "Enable padding to detect heap block overflow (used only in DEBUG mode or with Valgrind)" ON)
-option(MI_OVERRIDE          "Override the standard malloc interface (e.g. define entry points for malloc() etc)" ON)
-option(MI_XMALLOC           "Enable abort() call on memory allocation failure by default" OFF)
-option(MI_SHOW_ERRORS       "Show error and warning messages by default (only enabled by default in DEBUG mode)" OFF)
-option(MI_VALGRIND          "Compile with Valgrind support (adds a small overhead)" OFF)
-option(MI_ASAN              "Compile with address sanitizer support (adds a small overhead)" OFF)
-option(MI_USE_CXX           "Use the C++ compiler to compile the library (instead of the C compiler)" OFF)
-option(MI_SEE_ASM           "Generate assembly files" OFF)
-option(MI_OSX_INTERPOSE     "Use interpose to override standard malloc on macOS" ON)
-option(MI_OSX_ZONE          "Use malloc zone to override standard malloc on macOS" ON)
-option(MI_WIN_REDIRECT      "Use redirection module ('mimalloc-redirect') on Windows if compiling mimalloc as a DLL" ON)
-option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF)
-option(MI_BUILD_SHARED      "Build shared library" ON)
-option(MI_BUILD_STATIC      "Build static library" ON)
-option(MI_BUILD_OBJECT      "Build object library" ON)
-option(MI_BUILD_TESTS       "Build test executables" ON)
-option(MI_DEBUG_TSAN        "Build with thread sanitizer (needs clang)" OFF)
-option(MI_DEBUG_UBSAN       "Build with undefined-behavior sanitizer (needs clang++)" OFF)
-option(MI_SKIP_COLLECT_ON_EXIT, "Skip collecting memory on program exit" OFF)
-
-# deprecated options
-option(MI_CHECK_FULL        "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF)
-option(MI_INSTALL_TOPLEVEL  "Install directly into $CMAKE_INSTALL_PREFIX instead of PREFIX/lib/mimalloc-version (deprecated)" OFF)
-option(MI_USE_LIBATOMIC     "Explicitly link with -latomic (on older systems) (deprecated and detected automatically)" OFF)
-
-include(CheckIncludeFiles)
-include(GNUInstallDirs)
-include("cmake/mimalloc-config-version.cmake")
-
-set(mi_sources
-    src/stats.c
-    src/random.c
-    src/os.c
-    src/bitmap.c
-    src/arena.c
-    src/segment-cache.c
-    src/segment.c
-    src/page.c
-    src/alloc.c
-    src/alloc-aligned.c
-    src/alloc-posix.c
-    src/heap.c
-    src/options.c
-    src/init.c)
-
-
-# -----------------------------------------------------------------------------
-# Convenience: set default build type depending on the build directory
-# -----------------------------------------------------------------------------
-
-message(STATUS "")
-if (NOT CMAKE_BUILD_TYPE)
-  if ("${CMAKE_BINARY_DIR}" MATCHES ".*(D|d)ebug$" OR  MI_DEBUG_FULL)
-    message(STATUS "No build type selected, default to: Debug")
-    set(CMAKE_BUILD_TYPE "Debug")
-  else()
-    message(STATUS "No build type selected, default to: Release")
-    set(CMAKE_BUILD_TYPE "Release")
-  endif()
-endif()
-
-if("${CMAKE_BINARY_DIR}" MATCHES ".*(S|s)ecure$")
-  message(STATUS "Default to secure build")
-  set(MI_SECURE "ON")
-endif()
-
-
-# -----------------------------------------------------------------------------
-# Process options
-# -----------------------------------------------------------------------------
-
-if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel")
-  set(MI_USE_CXX "ON")
-endif()
-
-if(MI_OVERRIDE)
-  message(STATUS "Override standard malloc (MI_OVERRIDE=ON)")
-  if(APPLE)
-    if(MI_OSX_ZONE)
-      # use zone's on macOS
-      message(STATUS "  Use malloc zone to override malloc (MI_OSX_ZONE=ON)")
-      list(APPEND mi_sources src/alloc-override-osx.c)
-      list(APPEND mi_defines MI_OSX_ZONE=1)
-      if (NOT MI_OSX_INTERPOSE)
-        message(STATUS "  WARNING: zone overriding usually also needs interpose (use -DMI_OSX_INTERPOSE=ON)")
-      endif()
-    endif()
-    if(MI_OSX_INTERPOSE)
-      # use interpose on macOS
-      message(STATUS "  Use interpose to override malloc (MI_OSX_INTERPOSE=ON)")
-      list(APPEND mi_defines MI_OSX_INTERPOSE=1)
-      if (NOT MI_OSX_ZONE)
-        message(STATUS "  WARNING: interpose usually also needs zone overriding (use -DMI_OSX_INTERPOSE=ON)")
-      endif()
-    endif()
-    if(MI_USE_CXX AND MI_OSX_INTERPOSE)
-      message(STATUS "  WARNING: if dynamically overriding malloc/free, it is more reliable to build mimalloc as C code (use -DMI_USE_CXX=OFF)")
-    endif()
-  endif()
-endif()
-
-if(WIN32)
-  if (MI_WIN_REDIRECT)
-    if (MSVC_C_ARCHITECTURE_ID MATCHES "ARM")
-      message(STATUS "Cannot use redirection on Windows ARM (MI_WIN_REDIRECT=OFF)")
-      set(MI_WIN_REDIRECT OFF)
-    endif()
-  endif()
-  if (NOT MI_WIN_REDIRECT)
-    # use a negative define for backward compatibility
-    list(APPEND mi_defines MI_WIN_NOREDIRECT=1)
-  endif()
-endif()
-
-if(MI_SECURE)
-  message(STATUS "Set full secure build (MI_SECURE=ON)")
-  list(APPEND mi_defines MI_SECURE=4)
-  #if (MI_VALGRIND)
-  #  message(WARNING "Secure mode is a bit weakened when compiling with Valgrind support as buffer overflow detection is no longer byte-precise (if running without valgrind)")
-  #endif()
-endif()
-
-if(MI_VALGRIND)
-  CHECK_INCLUDE_FILES("valgrind/valgrind.h;valgrind/memcheck.h" MI_HAS_VALGRINDH)
-  if (NOT MI_HAS_VALGRINDH)
-    set(MI_VALGRIND OFF)
-    message(WARNING "Cannot find the 'valgrind/valgrind.h' and 'valgrind/memcheck.h' -- install valgrind first")
-    message(STATUS  "Compile **without** Valgrind support (MI_VALGRIND=OFF)")
-  else()
-    message(STATUS "Compile with Valgrind support (MI_VALGRIND=ON)")
-    list(APPEND mi_defines MI_VALGRIND=1)
-  endif()
-endif()
-
-if(MI_ASAN)
-  if (MI_VALGRIND)
-    set(MI_ASAN OFF)
-    message(WARNING "Cannot enable address sanitizer support with also Valgrind support enabled (MI_ASAN=OFF)")
-  else()
-    CHECK_INCLUDE_FILES("sanitizer/asan_interface.h" MI_HAS_ASANH)
-    if (NOT MI_HAS_ASANH)
-      set(MI_ASAN OFF)
-      message(WARNING "Cannot find the 'sanitizer/asan_interface.h' -- install address sanitizer support first")
-      message(STATUS  "Compile **without** address sanitizer support (MI_ASAN=OFF)")
-    else()
-      message(STATUS "Compile with address sanitizer support (MI_ASAN=ON)")
-      list(APPEND mi_defines MI_ASAN=1)
-      list(APPEND mi_cflags -fsanitize=address)
-      list(APPEND CMAKE_EXE_LINKER_FLAGS -fsanitize=address)
-    endif()
-  endif()
-endif()
-
-if(MI_SEE_ASM)
-  message(STATUS "Generate assembly listings (MI_SEE_ASM=ON)")
-  list(APPEND mi_cflags -save-temps)
-endif()
-
-if(MI_CHECK_FULL)
-  message(STATUS "The MI_CHECK_FULL option is deprecated, use MI_DEBUG_FULL instead")
-  set(MI_DEBUG_FULL "ON")
-endif()
-
-if (MI_SKIP_COLLECT_ON_EXIT)
-  message(STATUS "Skip collecting memory on program exit (MI_SKIP_COLLECT_ON_EXIT=ON)")
-  list(APPEND mi_defines MI_SKIP_COLLECT_ON_EXIT=1)
-endif()
-
-if(MI_DEBUG_FULL)
-  message(STATUS "Set debug level to full internal invariant checking (MI_DEBUG_FULL=ON)")
-  list(APPEND mi_defines MI_DEBUG=3)   # full invariant checking
-endif()
-
-if(NOT MI_PADDING)
-  message(STATUS "Disable padding of heap blocks in debug mode (MI_PADDING=OFF)")
-  list(APPEND mi_defines MI_PADDING=0)
-endif()
-
-if(MI_XMALLOC)
-  message(STATUS "Enable abort() calls on memory allocation failure (MI_XMALLOC=ON)")
-  list(APPEND mi_defines MI_XMALLOC=1)
-endif()
-
-if(MI_SHOW_ERRORS)
-  message(STATUS "Enable printing of error and warning messages by default (MI_SHOW_ERRORS=ON)")
-  list(APPEND mi_defines MI_SHOW_ERRORS=1)
-endif()
-
-if(MI_DEBUG_TSAN)
-  if(CMAKE_C_COMPILER_ID MATCHES "Clang")
-    message(STATUS "Build with thread sanitizer (MI_DEBUG_TSAN=ON)")
-    list(APPEND mi_defines MI_TSAN=1)
-    list(APPEND mi_cflags -fsanitize=thread -g -O1)
-    list(APPEND CMAKE_EXE_LINKER_FLAGS -fsanitize=thread)
-  else()
-    message(WARNING "Can only use thread sanitizer with clang (MI_DEBUG_TSAN=ON but ignored)")
-  endif()
-endif()
-
-if(MI_DEBUG_UBSAN)
-  if(CMAKE_BUILD_TYPE MATCHES "Debug")
-    if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-      message(STATUS "Build with undefined-behavior sanitizer (MI_DEBUG_UBSAN=ON)")
-      list(APPEND mi_cflags -fsanitize=undefined -g -fno-sanitize-recover=undefined)
-      list(APPEND CMAKE_EXE_LINKER_FLAGS -fsanitize=undefined)
-      if (NOT MI_USE_CXX)
-        message(STATUS "(switch to use C++ due to MI_DEBUG_UBSAN)")
-        set(MI_USE_CXX "ON")
-      endif()
-    else()
-      message(WARNING "Can only use undefined-behavior sanitizer with clang++ (MI_DEBUG_UBSAN=ON but ignored)")
-    endif()
-  else()
-    message(WARNING "Can only use thread sanitizer with a debug build (CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE})")
-  endif()
-endif()
-
-if(MI_USE_CXX)
-  message(STATUS "Use the C++ compiler to compile (MI_USE_CXX=ON)")
-  set_source_files_properties(${mi_sources} PROPERTIES LANGUAGE CXX )
-  set_source_files_properties(src/static.c test/test-api.c test/test-api-fill test/test-stress PROPERTIES LANGUAGE CXX )
-  if(CMAKE_CXX_COMPILER_ID MATCHES "AppleClang|Clang")
-    list(APPEND mi_cflags -Wno-deprecated)
-  endif()
-  if(CMAKE_CXX_COMPILER_ID MATCHES "Intel" AND NOT CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM")
-    list(APPEND mi_cflags -Kc++)
-  endif()
-endif()
-
-# Compiler flags
-if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU")
-  list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden)
-  if(NOT MI_USE_CXX)
-    list(APPEND mi_cflags -Wstrict-prototypes)
-  endif()
-  if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang")
-    list(APPEND mi_cflags -Wpedantic -Wno-static-in-inline)
-  endif()
-endif()
-
-if(CMAKE_C_COMPILER_ID MATCHES "Intel")
-  list(APPEND mi_cflags -Wall -fvisibility=hidden)
-endif()
-
-if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM_NAME MATCHES "Haiku")
-  if(MI_LOCAL_DYNAMIC_TLS)
-    list(APPEND mi_cflags -ftls-model=local-dynamic)
-  else()
-    list(APPEND mi_cflags -ftls-model=initial-exec)
-  endif()
-  if(MI_OVERRIDE)
-    list(APPEND mi_cflags -fno-builtin-malloc)
-  endif()
-endif()
-
-if (MSVC AND MSVC_VERSION GREATER_EQUAL 1914)
-  list(APPEND mi_cflags /Zc:__cplusplus)
-endif()
-
-# extra needed libraries
-if(WIN32)
-  list(APPEND mi_libraries psapi shell32 user32 advapi32 bcrypt)
-  set(pc_libraries "-lpsapi -lshell32 -luser32 -ladvapi32 -lbcrypt")
-else()
-  set(pc_libraries "")
-  find_library(MI_LIBPTHREAD pthread)
-  if (MI_LIBPTHREAD)
-    list(APPEND mi_libraries ${MI_LIBPTHREAD})
-    set(pc_libraries "${pc_libraries} -pthread")
-  endif()
-  find_library(MI_LIBRT rt)
-  if(MI_LIBRT)
-    list(APPEND mi_libraries ${MI_LIBRT})
-    set(pc_libraries "${pc_libraries} -lrt")
-  endif()
-  find_library(MI_LIBATOMIC atomic)
-  if (NOT MI_LIBATOMIC AND MI_USE_LIBATOMIC)
-    set(MI_LIBATOMIC atomic)
-  endif()
-  if (MI_LIBATOMIC)
-    list(APPEND mi_libraries ${MI_LIBATOMIC})
-    set(pc_libraries "${pc_libraries} -latomic")
-  endif()
-endif()
-
-# -----------------------------------------------------------------------------
-# Install and output names
-# -----------------------------------------------------------------------------
-
-# dynamic/shared library and symlinks always go to /usr/local/lib equivalent
-set(mi_install_libdir   "${CMAKE_INSTALL_LIBDIR}")
-
-# static libraries and object files, includes, and cmake config files
-# are either installed at top level, or use versioned directories for side-by-side installation (default)
-if (MI_INSTALL_TOPLEVEL)
-  set(mi_install_objdir     "${CMAKE_INSTALL_LIBDIR}")
-  set(mi_install_incdir     "${CMAKE_INSTALL_INCLUDEDIR}")
-  set(mi_install_cmakedir   "${CMAKE_INSTALL_LIBDIR}/cmake/mimalloc")
-else()
-  set(mi_install_objdir     "${CMAKE_INSTALL_LIBDIR}/mimalloc-${mi_version}")       # for static library and object files
-  set(mi_install_incdir     "${CMAKE_INSTALL_INCLUDEDIR}/mimalloc-${mi_version}")   # for includes
-  set(mi_install_cmakedir   "${CMAKE_INSTALL_LIBDIR}/cmake/mimalloc-${mi_version}") # for cmake package info
-endif()
-
-set(mi_basename "mimalloc")
-if(MI_SECURE)
-  set(mi_basename "${mi_basename}-secure")
-endif()
-if(MI_VALGRIND)
-  set(mi_basename "${mi_basename}-valgrind")
-endif()
-if(MI_ASAN)
-  set(mi_basename "${mi_basename}-asan")
-endif()
-string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LC)
-if(NOT(CMAKE_BUILD_TYPE_LC MATCHES "^(release|relwithdebinfo|minsizerel|none)$"))
-  set(mi_basename "${mi_basename}-${CMAKE_BUILD_TYPE_LC}") #append build type (e.g. -debug) if not a release version
-endif()
-
-if(MI_BUILD_SHARED)
-  list(APPEND mi_build_targets "shared")
-endif()
-if(MI_BUILD_STATIC)
-  list(APPEND mi_build_targets "static")
-endif()
-if(MI_BUILD_OBJECT)
-  list(APPEND mi_build_targets "object")
-endif()
-if(MI_BUILD_TESTS)
-  list(APPEND mi_build_targets "tests")
-endif()
-
-message(STATUS "")
-message(STATUS "Library base name: ${mi_basename}")
-message(STATUS "Version          : ${mi_version}")
-message(STATUS "Build type       : ${CMAKE_BUILD_TYPE_LC}")
-if(MI_USE_CXX)
-  message(STATUS "C++ Compiler     : ${CMAKE_CXX_COMPILER}")
-else()
-  message(STATUS "C Compiler       : ${CMAKE_C_COMPILER}")
-endif()
-message(STATUS "Compiler flags   : ${mi_cflags}")
-message(STATUS "Compiler defines : ${mi_defines}")
-message(STATUS "Link libraries   : ${mi_libraries}")
-message(STATUS "Build targets    : ${mi_build_targets}")
-message(STATUS "")
-
-# -----------------------------------------------------------------------------
-# Main targets
-# -----------------------------------------------------------------------------
-
-# shared library
-if(MI_BUILD_SHARED)
-  add_library(mimalloc SHARED ${mi_sources})
-  set_target_properties(mimalloc PROPERTIES VERSION ${mi_version} SOVERSION ${mi_version_major} OUTPUT_NAME ${mi_basename} )
-  target_compile_definitions(mimalloc PRIVATE ${mi_defines} MI_SHARED_LIB MI_SHARED_LIB_EXPORT)
-  target_compile_options(mimalloc PRIVATE ${mi_cflags})
-  target_link_libraries(mimalloc PRIVATE ${mi_libraries})
-  target_include_directories(mimalloc PUBLIC
-      $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
-      $<INSTALL_INTERFACE:${mi_install_incdir}>
-  )
-  if(WIN32 AND MI_WIN_REDIRECT)
-    # On windows, link and copy the mimalloc redirection dll too.
-    if(CMAKE_SIZEOF_VOID_P EQUAL 4)
-      set(MIMALLOC_REDIRECT_SUFFIX "32")
-    else()
-      set(MIMALLOC_REDIRECT_SUFFIX "")
-    endif()
-
-    target_link_libraries(mimalloc PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/bin/mimalloc-redirect${MIMALLOC_REDIRECT_SUFFIX}.lib)
-    add_custom_command(TARGET mimalloc POST_BUILD
-      COMMAND "${CMAKE_COMMAND}" -E copy "${CMAKE_CURRENT_SOURCE_DIR}/bin/mimalloc-redirect${MIMALLOC_REDIRECT_SUFFIX}.dll" $<TARGET_FILE_DIR:mimalloc>
-      COMMENT "Copy mimalloc-redirect${MIMALLOC_REDIRECT_SUFFIX}.dll to output directory")
-    install(FILES "$<TARGET_FILE_DIR:mimalloc>/mimalloc-redirect${MIMALLOC_REDIRECT_SUFFIX}.dll" DESTINATION ${mi_install_libdir})
-  endif()
-
-  install(TARGETS mimalloc EXPORT mimalloc DESTINATION ${mi_install_libdir} LIBRARY)
-  install(EXPORT mimalloc DESTINATION ${mi_install_cmakedir})
-endif()
-
-# static library
-if (MI_BUILD_STATIC)
-  add_library(mimalloc-static STATIC ${mi_sources})
-  set_property(TARGET mimalloc-static PROPERTY POSITION_INDEPENDENT_CODE ON)
-  target_compile_definitions(mimalloc-static PRIVATE ${mi_defines} MI_STATIC_LIB)
-  target_compile_options(mimalloc-static PRIVATE ${mi_cflags})
-  target_link_libraries(mimalloc-static PRIVATE ${mi_libraries})
-  target_include_directories(mimalloc-static PUBLIC
-      $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
-      $<INSTALL_INTERFACE:${mi_install_incdir}>
-  )
-  if(WIN32)
-    # When building both static and shared libraries on Windows, a static library should use a
-    # different output name to avoid the conflict with the import library of a shared one.
-    string(REPLACE "mimalloc" "mimalloc-static" mi_output_name ${mi_basename})
-    set_target_properties(mimalloc-static PROPERTIES OUTPUT_NAME ${mi_output_name})
-  else()
-    set_target_properties(mimalloc-static PROPERTIES OUTPUT_NAME ${mi_basename})
-  endif()
-
-  install(TARGETS mimalloc-static EXPORT mimalloc DESTINATION ${mi_install_objdir} LIBRARY)
-  install(EXPORT mimalloc DESTINATION ${mi_install_cmakedir})
-endif()
-
-# install include files
-install(FILES include/mimalloc.h DESTINATION ${mi_install_incdir})
-install(FILES include/mimalloc-override.h DESTINATION ${mi_install_incdir})
-install(FILES include/mimalloc-new-delete.h DESTINATION ${mi_install_incdir})
-install(FILES cmake/mimalloc-config.cmake DESTINATION ${mi_install_cmakedir})
-install(FILES cmake/mimalloc-config-version.cmake DESTINATION ${mi_install_cmakedir})
-
-
-# single object file for more predictable static overriding
-if (MI_BUILD_OBJECT)
-  add_library(mimalloc-obj OBJECT src/static.c)
-  set_property(TARGET mimalloc-obj PROPERTY POSITION_INDEPENDENT_CODE ON)
-  target_compile_definitions(mimalloc-obj PRIVATE ${mi_defines})
-  target_compile_options(mimalloc-obj PRIVATE ${mi_cflags})
-  target_include_directories(mimalloc-obj PUBLIC
-      $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
-      $<INSTALL_INTERFACE:${mi_install_incdir}>
-  )
-
-  # the following seems to lead to cmake warnings/errors on some systems, disable for now :-(
-  # install(TARGETS mimalloc-obj EXPORT mimalloc DESTINATION ${mi_install_objdir})
-
-  # the FILES expression can also be: $<TARGET_OBJECTS:mimalloc-obj>
-  # but that fails cmake versions less than 3.10 so we leave it as is for now
-  install(FILES ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/mimalloc-obj.dir/src/static.c${CMAKE_C_OUTPUT_EXTENSION}
-          DESTINATION ${mi_install_objdir}
-          RENAME ${mi_basename}${CMAKE_C_OUTPUT_EXTENSION} )
-endif()
-
-# pkg-config file support
-include("cmake/JoinPaths.cmake")
-join_paths(includedir_for_pc_file "\${prefix}" "${CMAKE_INSTALL_INCLUDEDIR}")
-join_paths(libdir_for_pc_file "\${prefix}" "${CMAKE_INSTALL_LIBDIR}")
-
-configure_file(mimalloc.pc.in mimalloc.pc @ONLY)
-install(FILES "${CMAKE_CURRENT_BINARY_DIR}/mimalloc.pc"
-        DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig/")
-
-# -----------------------------------------------------------------------------
-# API surface testing
-# -----------------------------------------------------------------------------
-
-if (MI_BUILD_TESTS)
-  enable_testing()
-
-  foreach(TEST_NAME api api-fill stress)
-    add_executable(mimalloc-test-${TEST_NAME} test/test-${TEST_NAME}.c)
-    target_compile_definitions(mimalloc-test-${TEST_NAME} PRIVATE ${mi_defines})
-    target_compile_options(mimalloc-test-${TEST_NAME} PRIVATE ${mi_cflags})
-    target_include_directories(mimalloc-test-${TEST_NAME} PRIVATE include)
-    target_link_libraries(mimalloc-test-${TEST_NAME} PRIVATE mimalloc ${mi_libraries})
-
-    add_test(NAME test-${TEST_NAME} COMMAND mimalloc-test-${TEST_NAME})
-  endforeach()
-endif()
-
-# -----------------------------------------------------------------------------
-# Set override properties
-# -----------------------------------------------------------------------------
-if (MI_OVERRIDE)
-  if (MI_BUILD_SHARED)
-    target_compile_definitions(mimalloc PRIVATE MI_MALLOC_OVERRIDE)
-  endif()
-  if(NOT WIN32)
-    # It is only possible to override malloc on Windows when building as a DLL.
-    if (MI_BUILD_STATIC)
-      target_compile_definitions(mimalloc-static PRIVATE MI_MALLOC_OVERRIDE)
-    endif()
-    if (MI_BUILD_OBJECT)
-      target_compile_definitions(mimalloc-obj PRIVATE MI_MALLOC_OVERRIDE)
-    endif()
-  endif()
-endif()
diff --git a/3rd/mimalloc-2.0.9/azure-pipelines.yml b/3rd/mimalloc-2.0.9/azure-pipelines.yml
deleted file mode 100644
index e3639fef..00000000
--- a/3rd/mimalloc-2.0.9/azure-pipelines.yml
+++ /dev/null
@@ -1,181 +0,0 @@
-# Starter pipeline
-# Start with a minimal pipeline that you can customize to build and deploy your code.
-# Add steps that build, run tests, deploy, and more:
-# https://aka.ms/yaml
-
-trigger:
-  branches:
-    include:
-    - master
-    - dev
-    - dev-slice
-  tags:
-    include:
-    - v*
-
-jobs:
-- job:
-  displayName: Windows
-  pool:
-    vmImage:
-      windows-2022
-  strategy:
-    matrix:
-      Debug:
-        BuildType: debug
-        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
-        MSBuildConfiguration: Debug
-      Release:
-        BuildType: release
-        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
-        MSBuildConfiguration: Release
-      Secure:
-        BuildType: secure
-        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
-        MSBuildConfiguration: Release
-  steps:
-  - task: CMake@1
-    inputs:
-      workingDirectory: $(BuildType)
-      cmakeArgs: .. $(cmakeExtraArgs)
-  - task: MSBuild@1
-    inputs:
-      solution: $(BuildType)/libmimalloc.sln
-      configuration: '$(MSBuildConfiguration)'
-      msbuildArguments: -m
-  - script: ctest --verbose --timeout 120 -C $(MSBuildConfiguration)
-    workingDirectory: $(BuildType)
-    displayName: CTest
-  #- script: $(BuildType)\$(BuildType)\mimalloc-test-stress
-  #  displayName: TestStress
-  #- upload: $(Build.SourcesDirectory)/$(BuildType)
-  #  artifact: mimalloc-windows-$(BuildType)
-
-- job:
-  displayName: Linux
-  pool:
-    vmImage:
-     ubuntu-22.04
-  strategy:
-    matrix:
-      Debug:
-        CC: gcc
-        CXX: g++
-        BuildType: debug
-        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
-      Release:
-        CC: gcc
-        CXX: g++
-        BuildType: release
-        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
-      Secure:
-        CC: gcc
-        CXX: g++
-        BuildType: secure
-        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
-      Debug++:
-        CC: gcc
-        CXX: g++
-        BuildType: debug-cxx
-        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_USE_CXX=ON
-      Debug Clang:
-        CC: clang
-        CXX: clang++
-        BuildType: debug-clang
-        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
-      Release Clang:
-        CC: clang
-        CXX: clang++
-        BuildType: release-clang
-        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
-      Secure Clang:
-        CC: clang
-        CXX: clang++
-        BuildType: secure-clang
-        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
-      Debug++ Clang:
-        CC: clang
-        CXX: clang++
-        BuildType: debug-clang-cxx
-        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_USE_CXX=ON
-  steps:
-  - task: CMake@1
-    inputs:
-      workingDirectory: $(BuildType)
-      cmakeArgs: .. $(cmakeExtraArgs)
-  - script: make -j$(nproc) -C $(BuildType)
-    displayName: Make
-  - script: ctest --verbose --timeout 120
-    workingDirectory: $(BuildType)
-    displayName: CTest
-#  - upload: $(Build.SourcesDirectory)/$(BuildType)
-#    artifact: mimalloc-ubuntu-$(BuildType)
-
-- job:
-  displayName: macOS
-  pool:
-    vmImage:
-      macOS-latest
-  strategy:
-    matrix:
-      Debug:
-        BuildType: debug
-        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
-      Release:
-        BuildType: release
-        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
-      Secure:
-        BuildType: secure
-        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
-  steps:
-  - task: CMake@1
-    inputs:
-      workingDirectory: $(BuildType)
-      cmakeArgs: .. $(cmakeExtraArgs)
-  - script: make -j$(sysctl -n hw.ncpu) -C $(BuildType)
-    displayName: Make
-  # - script: MIMALLOC_VERBOSE=1 ./mimalloc-test-api
-  #   workingDirectory: $(BuildType)
-  #   displayName: TestAPI
-  # - script: MIMALLOC_VERBOSE=1 ./mimalloc-test-stress
-  #   workingDirectory: $(BuildType)
-  #   displayName: TestStress    
-  - script: ctest --verbose --timeout 120
-    workingDirectory: $(BuildType)
-    displayName: CTest
-    
-#  - upload: $(Build.SourcesDirectory)/$(BuildType)
-#    artifact: mimalloc-macos-$(BuildType)
-
-# - job:
-#   displayName: Windows-2017
-#   pool:
-#     vmImage:
-#       vs2017-win2016
-#   strategy:
-#     matrix:
-#       Debug:
-#         BuildType: debug
-#         cmakeExtraArgs: -A x64 -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
-#         MSBuildConfiguration: Debug
-#       Release:
-#         BuildType: release
-#         cmakeExtraArgs: -A x64 -DCMAKE_BUILD_TYPE=Release
-#         MSBuildConfiguration: Release
-#       Secure:
-#         BuildType: secure
-#         cmakeExtraArgs: -A x64 -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
-#         MSBuildConfiguration: Release
-#   steps:
-#   - task: CMake@1
-#     inputs:
-#       workingDirectory: $(BuildType)
-#       cmakeArgs: .. $(cmakeExtraArgs)
-#   - task: MSBuild@1
-#     inputs:
-#       solution: $(BuildType)/libmimalloc.sln
-#       configuration: '$(MSBuildConfiguration)'
-#   - script: |
-#       cd $(BuildType)
-#       ctest --verbose --timeout 120
-#     displayName: CTest
diff --git a/3rd/mimalloc-2.0.9/ide/vs2017/mimalloc-override-test.vcxproj b/3rd/mimalloc-2.0.9/ide/vs2017/mimalloc-override-test.vcxproj
deleted file mode 100644
index faaa00e3..00000000
--- a/3rd/mimalloc-2.0.9/ide/vs2017/mimalloc-override-test.vcxproj
+++ /dev/null
@@ -1,190 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|Win32">
-      <Configuration>Debug</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|Win32">
-      <Configuration>Release</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <VCProjectVersion>15.0</VCProjectVersion>
-    <ProjectGuid>{FEF7868F-750E-4C21-A04D-22707CC66879}</ProjectGuid>
-    <RootNamespace>mimalloc-override-test</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
-    <ProjectName>mimalloc-override-test</ProjectName>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Label="Shared">
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
-      <ExceptionHandling>false</ExceptionHandling>
-      <CompileAs>Default</CompileAs>
-      <SupportJustMyCode>false</SupportJustMyCode>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <AdditionalDependencies>kernel32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-    </Link>
-    <PostBuildEvent />
-    <PostBuildEvent>
-      <Command>
-      </Command>
-    </PostBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
-      <ExceptionHandling>Sync</ExceptionHandling>
-      <CompileAs>Default</CompileAs>
-      <SupportJustMyCode>false</SupportJustMyCode>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <EntryPointSymbol>
-      </EntryPointSymbol>
-      <AdditionalDependencies>kernel32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-    </Link>
-    <PostBuildEvent />
-    <PostBuildEvent>
-      <Command>
-      </Command>
-    </PostBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <SubSystem>Console</SubSystem>
-      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-    </Link>
-    <PostBuildEvent>
-      <Command>
-      </Command>
-    </PostBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <SubSystem>Console</SubSystem>
-      <EntryPointSymbol>
-      </EntryPointSymbol>
-      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-    </Link>
-    <PostBuildEvent>
-      <Command>
-      </Command>
-    </PostBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemGroup>
-    <ProjectReference Include="mimalloc-override.vcxproj">
-      <Project>{abb5eae7-b3e6-432e-b636-333449892ea7}</Project>
-    </ProjectReference>
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\test\main-override.cpp" />
-  </ItemGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
-</Project>
\ No newline at end of file
diff --git a/3rd/mimalloc-2.0.9/ide/vs2017/mimalloc-override.vcxproj b/3rd/mimalloc-2.0.9/ide/vs2017/mimalloc-override.vcxproj
deleted file mode 100644
index 6fdd83d9..00000000
--- a/3rd/mimalloc-2.0.9/ide/vs2017/mimalloc-override.vcxproj
+++ /dev/null
@@ -1,256 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|Win32">
-      <Configuration>Debug</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|Win32">
-      <Configuration>Release</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <VCProjectVersion>15.0</VCProjectVersion>
-    <ProjectGuid>{ABB5EAE7-B3E6-432E-B636-333449892EA7}</ProjectGuid>
-    <RootNamespace>mimalloc-override</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
-    <ProjectName>mimalloc-override</ProjectName>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>DynamicLibrary</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
-    <ConfigurationType>DynamicLibrary</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>DynamicLibrary</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>DynamicLibrary</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Label="Shared">
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-    <TargetExt>.dll</TargetExt>
-    <TargetName>mimalloc-override</TargetName>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-    <TargetExt>.dll</TargetExt>
-    <TargetName>mimalloc-override</TargetName>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-    <TargetExt>.dll</TargetExt>
-    <TargetName>mimalloc-override</TargetName>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-    <TargetExt>.dll</TargetExt>
-    <TargetName>mimalloc-override</TargetName>
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
-      <SupportJustMyCode>false</SupportJustMyCode>
-      <CompileAs>Default</CompileAs>
-    </ClCompile>
-    <Link>
-      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <IgnoreSpecificDefaultLibraries>
-      </IgnoreSpecificDefaultLibraries>
-      <ModuleDefinitionFile>
-      </ModuleDefinitionFile>
-      <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
-      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
-    </Link>
-    <PostBuildEvent>
-      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect32.dll" "$(OutputPath)"</Command>
-    </PostBuildEvent>
-    <PostBuildEvent>
-      <Message>Copy mimalloc-redirect32.dll to the output directory</Message>
-    </PostBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
-      <SupportJustMyCode>false</SupportJustMyCode>
-      <CompileAs>Default</CompileAs>
-    </ClCompile>
-    <Link>
-      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;bcrypt.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <IgnoreSpecificDefaultLibraries>
-      </IgnoreSpecificDefaultLibraries>
-      <ModuleDefinitionFile>
-      </ModuleDefinitionFile>
-      <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
-      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
-    </Link>
-    <PostBuildEvent>
-      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect.dll" "$(OutputPath)"</Command>
-    </PostBuildEvent>
-    <PostBuildEvent>
-      <Message>copy mimalloc-redirect.dll to the output directory</Message>
-    </PostBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-      <AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
-      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
-      <WholeProgramOptimization>false</WholeProgramOptimization>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
-      <CompileAs>Default</CompileAs>
-      <BufferSecurityCheck>false</BufferSecurityCheck>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <ModuleDefinitionFile>
-      </ModuleDefinitionFile>
-      <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
-      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
-    </Link>
-    <PostBuildEvent>
-      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect32.dll" "$(OutputPath)"</Command>
-    </PostBuildEvent>
-    <PostBuildEvent>
-      <Message>Copy mimalloc-redirect32.dll to the output directory</Message>
-    </PostBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-      <AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
-      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
-      <WholeProgramOptimization>false</WholeProgramOptimization>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
-      <CompileAs>Default</CompileAs>
-      <BufferSecurityCheck>false</BufferSecurityCheck>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;bcrypt.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <ModuleDefinitionFile>
-      </ModuleDefinitionFile>
-      <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
-      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
-    </Link>
-    <PostBuildEvent>
-      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect.dll" "$(OutputPath)"</Command>
-    </PostBuildEvent>
-    <PostBuildEvent>
-      <Message>copy mimalloc-redirect.dll to the output directory</Message>
-    </PostBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemGroup>
-    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc-internal.h" />
-    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h" />
-    <ClInclude Include="..\..\include\mimalloc-atomic.h" />
-    <ClInclude Include="..\..\include\mimalloc-new-delete.h" />
-    <ClInclude Include="..\..\include\mimalloc-override.h" />
-    <ClInclude Include="..\..\include\mimalloc-types.h" />
-    <ClInclude Include="..\..\src\bitmap.h" />
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\src\alloc-aligned.c">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="..\..\src\alloc-override.c">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="..\..\src\alloc-posix.c" />
-    <ClCompile Include="..\..\src\alloc.c" />
-    <ClCompile Include="..\..\src\arena.c" />
-    <ClCompile Include="..\..\src\bitmap.c" />
-    <ClCompile Include="..\..\src\heap.c" />
-    <ClCompile Include="..\..\src\init.c" />
-    <ClCompile Include="..\..\src\options.c" />
-    <ClCompile Include="..\..\src\os.c" />
-    <ClCompile Include="..\..\src\page-queue.c">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="..\..\src\page.c" />
-    <ClCompile Include="..\..\src\random.c" />
-    <ClCompile Include="..\..\src\segment-cache.c" />
-    <ClCompile Include="..\..\src\segment.c" />
-    <ClCompile Include="..\..\src\stats.c" />
-  </ItemGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
-</Project>
\ No newline at end of file
diff --git a/3rd/mimalloc-2.0.9/ide/vs2017/mimalloc-test-stress.vcxproj b/3rd/mimalloc-2.0.9/ide/vs2017/mimalloc-test-stress.vcxproj
deleted file mode 100644
index b8267d0b..00000000
--- a/3rd/mimalloc-2.0.9/ide/vs2017/mimalloc-test-stress.vcxproj
+++ /dev/null
@@ -1,159 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|Win32">
-      <Configuration>Debug</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|Win32">
-      <Configuration>Release</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <VCProjectVersion>15.0</VCProjectVersion>
-    <ProjectGuid>{FEF7958F-750E-4C21-A04D-22707CC66878}</ProjectGuid>
-    <RootNamespace>mimalloc-test-stress</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
-    <ProjectName>mimalloc-test-stress</ProjectName>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Label="Shared">
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <SubSystem>Console</SubSystem>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <SubSystem>Console</SubSystem>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\test\test-stress.c">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
-    </ClCompile>
-  </ItemGroup>
-  <ItemGroup>
-    <ProjectReference Include="mimalloc.vcxproj">
-      <Project>{abb5eae7-b3e6-432e-b636-333449892ea6}</Project>
-    </ProjectReference>
-  </ItemGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
-</Project>
\ No newline at end of file
diff --git a/3rd/mimalloc-2.0.9/ide/vs2017/mimalloc-test.vcxproj b/3rd/mimalloc-2.0.9/ide/vs2017/mimalloc-test.vcxproj
deleted file mode 100644
index 27c7bb6e..00000000
--- a/3rd/mimalloc-2.0.9/ide/vs2017/mimalloc-test.vcxproj
+++ /dev/null
@@ -1,158 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|Win32">
-      <Configuration>Debug</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|Win32">
-      <Configuration>Release</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <VCProjectVersion>15.0</VCProjectVersion>
-    <ProjectGuid>{FEF7858F-750E-4C21-A04D-22707CC66878}</ProjectGuid>
-    <RootNamespace>mimalloctest</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
-    <ProjectName>mimalloc-test</ProjectName>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Label="Shared">
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-      <LanguageStandard>stdcpp17</LanguageStandard>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-      <LanguageStandard>stdcpp17</LanguageStandard>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-      <LanguageStandard>stdcpp17</LanguageStandard>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <SubSystem>Console</SubSystem>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-      <LanguageStandard>stdcpp17</LanguageStandard>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <SubSystem>Console</SubSystem>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemGroup>
-    <ProjectReference Include="mimalloc.vcxproj">
-      <Project>{abb5eae7-b3e6-432e-b636-333449892ea6}</Project>
-    </ProjectReference>
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\test\main-override-static.c" />
-  </ItemGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
-</Project>
\ No newline at end of file
diff --git a/3rd/mimalloc-2.0.9/ide/vs2017/mimalloc.sln b/3rd/mimalloc-2.0.9/ide/vs2017/mimalloc.sln
deleted file mode 100644
index 7dbf53e1..00000000
--- a/3rd/mimalloc-2.0.9/ide/vs2017/mimalloc.sln
+++ /dev/null
@@ -1,71 +0,0 @@
-﻿
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio 15
-VisualStudioVersion = 15.0.28010.2016
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc", "mimalloc.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA6}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test", "mimalloc-test.vcxproj", "{FEF7858F-750E-4C21-A04D-22707CC66878}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override", "mimalloc-override.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA7}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override-test", "mimalloc-override-test.vcxproj", "{FEF7868F-750E-4C21-A04D-22707CC66879}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test-stress", "mimalloc-test-stress.vcxproj", "{FEF7958F-750E-4C21-A04D-22707CC66878}"
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|x64 = Debug|x64
-		Debug|x86 = Debug|x86
-		Release|x64 = Release|x64
-		Release|x86 = Release|x86
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x64.ActiveCfg = Debug|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x64.Build.0 = Debug|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x86.ActiveCfg = Debug|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x86.Build.0 = Debug|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x64.ActiveCfg = Release|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x64.Build.0 = Release|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x86.ActiveCfg = Release|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x86.Build.0 = Release|Win32
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x64.ActiveCfg = Debug|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x64.Build.0 = Debug|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x86.ActiveCfg = Debug|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x86.Build.0 = Debug|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x64.ActiveCfg = Release|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x64.Build.0 = Release|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x86.ActiveCfg = Release|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x86.Build.0 = Release|Win32
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x64.ActiveCfg = Debug|x64
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x64.Build.0 = Debug|x64
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x86.ActiveCfg = Debug|Win32
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x86.Build.0 = Debug|Win32
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x64.ActiveCfg = Release|x64
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x64.Build.0 = Release|x64
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x86.ActiveCfg = Release|Win32
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x86.Build.0 = Release|Win32
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-	GlobalSection(ExtensibilityGlobals) = postSolution
-		SolutionGuid = {4297F93D-486A-4243-995F-7D32F59AE82A}
-	EndGlobalSection
-EndGlobal
diff --git a/3rd/mimalloc-2.0.9/ide/vs2017/mimalloc.vcxproj b/3rd/mimalloc-2.0.9/ide/vs2017/mimalloc.vcxproj
deleted file mode 100644
index 41fb77c1..00000000
--- a/3rd/mimalloc-2.0.9/ide/vs2017/mimalloc.vcxproj
+++ /dev/null
@@ -1,262 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|Win32">
-      <Configuration>Debug</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|Win32">
-      <Configuration>Release</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <VCProjectVersion>15.0</VCProjectVersion>
-    <ProjectGuid>{ABB5EAE7-B3E6-432E-B636-333449892EA6}</ProjectGuid>
-    <RootNamespace>mimalloc</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0.17134.0</WindowsTargetPlatformVersion>
-    <ProjectName>mimalloc</ProjectName>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Label="Shared">
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-    <TargetExt>.lib</TargetExt>
-    <TargetName>mimalloc-static</TargetName>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-    <TargetExt>.lib</TargetExt>
-    <TargetName>mimalloc-static</TargetName>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-    <TargetExt>.lib</TargetExt>
-    <TargetName>mimalloc-static</TargetName>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-    <TargetExt>.lib</TargetExt>
-    <TargetName>mimalloc-static</TargetName>
-  </PropertyGroup>
-  <PropertyGroup Label="LLVM" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <UseLlvmLib>false</UseLlvmLib>
-  </PropertyGroup>
-  <PropertyGroup Label="LLVM" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <UseLlvmLib>false</UseLlvmLib>
-  </PropertyGroup>
-  <PropertyGroup Label="LLVM" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <UseLlvmLib>false</UseLlvmLib>
-  </PropertyGroup>
-  <PropertyGroup Label="LLVM" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <UseLlvmLib>false</UseLlvmLib>
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
-      <CompileAs>CompileAsC</CompileAs>
-      <SupportJustMyCode>false</SupportJustMyCode>
-      <LanguageStandard>stdcpp17</LanguageStandard>
-    </ClCompile>
-    <Lib>
-      <AdditionalLibraryDirectories>
-      </AdditionalLibraryDirectories>
-      <AdditionalDependencies>
-      </AdditionalDependencies>
-    </Lib>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <WarningLevel>Level4</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
-      <CompileAs>CompileAsC</CompileAs>
-      <SupportJustMyCode>false</SupportJustMyCode>
-      <LanguageStandard>stdcpp17</LanguageStandard>
-    </ClCompile>
-    <PostBuildEvent>
-      <Command>
-      </Command>
-    </PostBuildEvent>
-    <Link>
-      <EntryPointSymbol>
-      </EntryPointSymbol>
-    </Link>
-    <Lib>
-      <AdditionalLibraryDirectories>
-      </AdditionalLibraryDirectories>
-      <AdditionalDependencies>
-      </AdditionalDependencies>
-    </Lib>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-      <AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
-      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
-      <WholeProgramOptimization>false</WholeProgramOptimization>
-      <BufferSecurityCheck>false</BufferSecurityCheck>
-      <InlineFunctionExpansion>Default</InlineFunctionExpansion>
-      <CompileAs>CompileAsC</CompileAs>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-    </Link>
-    <Lib>
-      <AdditionalLibraryDirectories>
-      </AdditionalLibraryDirectories>
-      <AdditionalDependencies>
-      </AdditionalDependencies>
-    </Lib>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <WarningLevel>Level4</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-      <AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
-      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
-      <WholeProgramOptimization>false</WholeProgramOptimization>
-      <BufferSecurityCheck>false</BufferSecurityCheck>
-      <InlineFunctionExpansion>Default</InlineFunctionExpansion>
-      <CompileAs>CompileAsC</CompileAs>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <EntryPointSymbol>
-      </EntryPointSymbol>
-    </Link>
-    <PostBuildEvent>
-      <Command>
-      </Command>
-    </PostBuildEvent>
-    <Lib>
-      <AdditionalLibraryDirectories>
-      </AdditionalLibraryDirectories>
-      <AdditionalDependencies>
-      </AdditionalDependencies>
-    </Lib>
-  </ItemDefinitionGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\src\alloc-aligned.c">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="..\..\src\alloc-override-osx.c">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="..\..\src\alloc-override.c">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="..\..\src\alloc-posix.c" />
-    <ClCompile Include="..\..\src\alloc.c" />
-    <ClCompile Include="..\..\src\arena.c" />
-    <ClCompile Include="..\..\src\bitmap.c" />
-    <ClCompile Include="..\..\src\heap.c" />
-    <ClCompile Include="..\..\src\init.c" />
-    <ClCompile Include="..\..\src\options.c" />
-    <ClCompile Include="..\..\src\page-queue.c">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="..\..\src\page.c" />
-    <ClCompile Include="..\..\src\random.c" />
-    <ClCompile Include="..\..\src\segment-cache.c" />
-    <ClCompile Include="..\..\src\segment.c" />
-    <ClCompile Include="..\..\src\os.c" />
-    <ClCompile Include="..\..\src\stats.c" />
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc-internal.h" />
-    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h" />
-    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc-atomic.h" />
-    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc-override.h" />
-    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc-types.h" />
-    <ClInclude Include="..\..\include\mimalloc-new-delete.h" />
-    <ClInclude Include="..\..\src\bitmap.h" />
-  </ItemGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
-</Project>
\ No newline at end of file
diff --git a/3rd/mimalloc-2.0.9/ide/vs2019/mimalloc-override-test.vcxproj b/3rd/mimalloc-2.0.9/ide/vs2019/mimalloc-override-test.vcxproj
deleted file mode 100644
index 7a9202f1..00000000
--- a/3rd/mimalloc-2.0.9/ide/vs2019/mimalloc-override-test.vcxproj
+++ /dev/null
@@ -1,190 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|Win32">
-      <Configuration>Debug</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|Win32">
-      <Configuration>Release</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <VCProjectVersion>15.0</VCProjectVersion>
-    <ProjectGuid>{FEF7868F-750E-4C21-A04D-22707CC66879}</ProjectGuid>
-    <RootNamespace>mimalloc-override-test</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
-    <ProjectName>mimalloc-override-test</ProjectName>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Label="Shared">
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
-      <ExceptionHandling>Sync</ExceptionHandling>
-      <CompileAs>Default</CompileAs>
-      <SupportJustMyCode>false</SupportJustMyCode>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <AdditionalDependencies>kernel32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-    </Link>
-    <PostBuildEvent />
-    <PostBuildEvent>
-      <Command>
-      </Command>
-    </PostBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
-      <ExceptionHandling>Sync</ExceptionHandling>
-      <CompileAs>Default</CompileAs>
-      <SupportJustMyCode>false</SupportJustMyCode>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <EntryPointSymbol>
-      </EntryPointSymbol>
-      <AdditionalDependencies>kernel32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-    </Link>
-    <PostBuildEvent />
-    <PostBuildEvent>
-      <Command>
-      </Command>
-    </PostBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <SubSystem>Console</SubSystem>
-      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-    </Link>
-    <PostBuildEvent>
-      <Command>
-      </Command>
-    </PostBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <SubSystem>Console</SubSystem>
-      <EntryPointSymbol>
-      </EntryPointSymbol>
-      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-    </Link>
-    <PostBuildEvent>
-      <Command>
-      </Command>
-    </PostBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\test\main-override.cpp" />
-  </ItemGroup>
-  <ItemGroup>
-    <ProjectReference Include="mimalloc-override.vcxproj">
-      <Project>{abb5eae7-b3e6-432e-b636-333449892ea7}</Project>
-    </ProjectReference>
-  </ItemGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
-</Project>
\ No newline at end of file
diff --git a/3rd/mimalloc-2.0.9/ide/vs2019/mimalloc-override.vcxproj b/3rd/mimalloc-2.0.9/ide/vs2019/mimalloc-override.vcxproj
deleted file mode 100644
index 4c4235b4..00000000
--- a/3rd/mimalloc-2.0.9/ide/vs2019/mimalloc-override.vcxproj
+++ /dev/null
@@ -1,256 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|Win32">
-      <Configuration>Debug</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|Win32">
-      <Configuration>Release</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <VCProjectVersion>15.0</VCProjectVersion>
-    <ProjectGuid>{ABB5EAE7-B3E6-432E-B636-333449892EA7}</ProjectGuid>
-    <RootNamespace>mimalloc-override</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
-    <ProjectName>mimalloc-override</ProjectName>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>DynamicLibrary</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
-    <ConfigurationType>DynamicLibrary</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>DynamicLibrary</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>DynamicLibrary</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Label="Shared">
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-    <TargetExt>.dll</TargetExt>
-    <TargetName>mimalloc-override</TargetName>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-    <TargetExt>.dll</TargetExt>
-    <TargetName>mimalloc-override</TargetName>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-    <TargetExt>.dll</TargetExt>
-    <TargetName>mimalloc-override</TargetName>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-    <TargetExt>.dll</TargetExt>
-    <TargetName>mimalloc-override</TargetName>
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
-      <SupportJustMyCode>false</SupportJustMyCode>
-      <CompileAs>Default</CompileAs>
-    </ClCompile>
-    <Link>
-      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <IgnoreSpecificDefaultLibraries>
-      </IgnoreSpecificDefaultLibraries>
-      <ModuleDefinitionFile>
-      </ModuleDefinitionFile>
-      <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
-      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
-    </Link>
-    <PostBuildEvent>
-      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect32.dll" "$(OutputPath)"</Command>
-    </PostBuildEvent>
-    <PostBuildEvent>
-      <Message>Copy mimalloc-redirect32.dll to the output directory</Message>
-    </PostBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MI_DEBUG=3;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
-      <SupportJustMyCode>false</SupportJustMyCode>
-      <CompileAs>Default</CompileAs>
-    </ClCompile>
-    <Link>
-      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <IgnoreSpecificDefaultLibraries>
-      </IgnoreSpecificDefaultLibraries>
-      <ModuleDefinitionFile>
-      </ModuleDefinitionFile>
-      <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
-      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
-    </Link>
-    <PostBuildEvent>
-      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect.dll" "$(OutputPath)"</Command>
-    </PostBuildEvent>
-    <PostBuildEvent>
-      <Message>copy mimalloc-redirect.dll to the output directory</Message>
-    </PostBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-      <AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
-      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
-      <WholeProgramOptimization>false</WholeProgramOptimization>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
-      <CompileAs>Default</CompileAs>
-      <BufferSecurityCheck>false</BufferSecurityCheck>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <ModuleDefinitionFile>
-      </ModuleDefinitionFile>
-      <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
-      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
-    </Link>
-    <PostBuildEvent>
-      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect32.dll" "$(OutputPath)"</Command>
-    </PostBuildEvent>
-    <PostBuildEvent>
-      <Message>Copy mimalloc-redirect32.dll to the output directory</Message>
-    </PostBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-      <AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
-      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
-      <WholeProgramOptimization>false</WholeProgramOptimization>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
-      <CompileAs>Default</CompileAs>
-      <BufferSecurityCheck>false</BufferSecurityCheck>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <ModuleDefinitionFile>
-      </ModuleDefinitionFile>
-      <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
-      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
-    </Link>
-    <PostBuildEvent>
-      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect.dll" "$(OutputPath)"</Command>
-    </PostBuildEvent>
-    <PostBuildEvent>
-      <Message>copy mimalloc-redirect.dll to the output directory</Message>
-    </PostBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemGroup>
-    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc-internal.h" />
-    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h" />
-    <ClInclude Include="..\..\include\mimalloc-atomic.h" />
-    <ClInclude Include="..\..\include\mimalloc-new-delete.h" />
-    <ClInclude Include="..\..\include\mimalloc-override.h" />
-    <ClInclude Include="..\..\include\mimalloc-types.h" />
-    <ClInclude Include="..\..\src\bitmap.h" />
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\src\alloc-aligned.c">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="..\..\src\alloc-override.c">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="..\..\src\alloc-posix.c" />
-    <ClCompile Include="..\..\src\alloc.c" />
-    <ClCompile Include="..\..\src\arena.c" />
-    <ClCompile Include="..\..\src\bitmap.c" />
-    <ClCompile Include="..\..\src\heap.c" />
-    <ClCompile Include="..\..\src\init.c" />
-    <ClCompile Include="..\..\src\options.c" />
-    <ClCompile Include="..\..\src\os.c" />
-    <ClCompile Include="..\..\src\page-queue.c">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="..\..\src\page.c" />
-    <ClCompile Include="..\..\src\random.c" />
-    <ClCompile Include="..\..\src\segment-cache.c" />
-    <ClCompile Include="..\..\src\segment.c" />
-    <ClCompile Include="..\..\src\stats.c" />
-  </ItemGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
-</Project>
\ No newline at end of file
diff --git a/3rd/mimalloc-2.0.9/ide/vs2019/mimalloc-test-api.vcxproj b/3rd/mimalloc-2.0.9/ide/vs2019/mimalloc-test-api.vcxproj
deleted file mode 100644
index 812a9cb1..00000000
--- a/3rd/mimalloc-2.0.9/ide/vs2019/mimalloc-test-api.vcxproj
+++ /dev/null
@@ -1,155 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|Win32">
-      <Configuration>Debug</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|Win32">
-      <Configuration>Release</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <VCProjectVersion>15.0</VCProjectVersion>
-    <ProjectGuid>{FFF7958F-750E-4C21-A04D-22707CC66878}</ProjectGuid>
-    <RootNamespace>mimalloc-test-api</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
-    <ProjectName>mimalloc-test-api</ProjectName>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Label="Shared">
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <SubSystem>Console</SubSystem>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <SubSystem>Console</SubSystem>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\test\test-api.c">
-    </ClCompile>
-  </ItemGroup>
-  <ItemGroup>
-    <ProjectReference Include="mimalloc.vcxproj">
-      <Project>{abb5eae7-b3e6-432e-b636-333449892ea6}</Project>
-    </ProjectReference>
-  </ItemGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
-</Project>
diff --git a/3rd/mimalloc-2.0.9/ide/vs2019/mimalloc-test-stress.vcxproj b/3rd/mimalloc-2.0.9/ide/vs2019/mimalloc-test-stress.vcxproj
deleted file mode 100644
index ef7ab357..00000000
--- a/3rd/mimalloc-2.0.9/ide/vs2019/mimalloc-test-stress.vcxproj
+++ /dev/null
@@ -1,159 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|Win32">
-      <Configuration>Debug</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|Win32">
-      <Configuration>Release</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <VCProjectVersion>15.0</VCProjectVersion>
-    <ProjectGuid>{FEF7958F-750E-4C21-A04D-22707CC66878}</ProjectGuid>
-    <RootNamespace>mimalloc-test-stress</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
-    <ProjectName>mimalloc-test-stress</ProjectName>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Label="Shared">
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <SubSystem>Console</SubSystem>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <SubSystem>Console</SubSystem>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\test\test-stress.c">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
-    </ClCompile>
-  </ItemGroup>
-  <ItemGroup>
-    <ProjectReference Include="mimalloc.vcxproj">
-      <Project>{abb5eae7-b3e6-432e-b636-333449892ea6}</Project>
-    </ProjectReference>
-  </ItemGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
-</Project>
\ No newline at end of file
diff --git a/3rd/mimalloc-2.0.9/ide/vs2019/mimalloc-test.vcxproj b/3rd/mimalloc-2.0.9/ide/vs2019/mimalloc-test.vcxproj
deleted file mode 100644
index 13af6ab4..00000000
--- a/3rd/mimalloc-2.0.9/ide/vs2019/mimalloc-test.vcxproj
+++ /dev/null
@@ -1,158 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|Win32">
-      <Configuration>Debug</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|Win32">
-      <Configuration>Release</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <VCProjectVersion>15.0</VCProjectVersion>
-    <ProjectGuid>{FEF7858F-750E-4C21-A04D-22707CC66878}</ProjectGuid>
-    <RootNamespace>mimalloctest</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
-    <ProjectName>mimalloc-test</ProjectName>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Label="Shared">
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-      <LanguageStandard>stdcpp17</LanguageStandard>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-      <LanguageStandard>stdcpp17</LanguageStandard>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-      <LanguageStandard>stdcpp17</LanguageStandard>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <SubSystem>Console</SubSystem>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-      <LanguageStandard>stdcpp17</LanguageStandard>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <SubSystem>Console</SubSystem>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemGroup>
-    <ProjectReference Include="mimalloc.vcxproj">
-      <Project>{abb5eae7-b3e6-432e-b636-333449892ea6}</Project>
-    </ProjectReference>
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\test\main-override-static.c" />
-  </ItemGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
-</Project>
\ No newline at end of file
diff --git a/3rd/mimalloc-2.0.9/ide/vs2019/mimalloc.sln b/3rd/mimalloc-2.0.9/ide/vs2019/mimalloc.sln
deleted file mode 100644
index 6ff01d3b..00000000
--- a/3rd/mimalloc-2.0.9/ide/vs2019/mimalloc.sln
+++ /dev/null
@@ -1,81 +0,0 @@
-﻿
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 16
-VisualStudioVersion = 16.0.29709.97
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc", "mimalloc.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA6}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test", "mimalloc-test.vcxproj", "{FEF7858F-750E-4C21-A04D-22707CC66878}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override", "mimalloc-override.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA7}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override-test", "mimalloc-override-test.vcxproj", "{FEF7868F-750E-4C21-A04D-22707CC66879}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test-stress", "mimalloc-test-stress.vcxproj", "{FEF7958F-750E-4C21-A04D-22707CC66878}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test-api", "mimalloc-test-api.vcxproj", "{FFF7958F-750E-4C21-A04D-22707CC66878}"
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|x64 = Debug|x64
-		Debug|x86 = Debug|x86
-		Release|x64 = Release|x64
-		Release|x86 = Release|x86
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x64.ActiveCfg = Debug|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x64.Build.0 = Debug|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x86.ActiveCfg = Debug|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x86.Build.0 = Debug|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x64.ActiveCfg = Release|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x64.Build.0 = Release|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x86.ActiveCfg = Release|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x86.Build.0 = Release|Win32
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x64.ActiveCfg = Debug|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x64.Build.0 = Debug|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x86.ActiveCfg = Debug|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x86.Build.0 = Debug|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x64.ActiveCfg = Release|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x64.Build.0 = Release|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x86.ActiveCfg = Release|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x86.Build.0 = Release|Win32
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x64.ActiveCfg = Debug|x64
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x64.Build.0 = Debug|x64
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x86.ActiveCfg = Debug|Win32
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x86.Build.0 = Debug|Win32
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x64.ActiveCfg = Release|x64
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x64.Build.0 = Release|x64
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x86.ActiveCfg = Release|Win32
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x86.Build.0 = Release|Win32
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-	GlobalSection(ExtensibilityGlobals) = postSolution
-		SolutionGuid = {4297F93D-486A-4243-995F-7D32F59AE82A}
-	EndGlobalSection
-EndGlobal
diff --git a/3rd/mimalloc-2.0.9/ide/vs2019/mimalloc.vcxproj b/3rd/mimalloc-2.0.9/ide/vs2019/mimalloc.vcxproj
deleted file mode 100644
index 9f967d94..00000000
--- a/3rd/mimalloc-2.0.9/ide/vs2019/mimalloc.vcxproj
+++ /dev/null
@@ -1,254 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|Win32">
-      <Configuration>Debug</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|Win32">
-      <Configuration>Release</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <VCProjectVersion>15.0</VCProjectVersion>
-    <ProjectGuid>{ABB5EAE7-B3E6-432E-B636-333449892EA6}</ProjectGuid>
-    <RootNamespace>mimalloc</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
-    <ProjectName>mimalloc</ProjectName>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v142</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Label="Shared">
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-    <TargetExt>.lib</TargetExt>
-    <TargetName>mimalloc-static</TargetName>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-    <TargetExt>.lib</TargetExt>
-    <TargetName>mimalloc-static</TargetName>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-    <TargetExt>.lib</TargetExt>
-    <TargetName>mimalloc-static</TargetName>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-    <TargetExt>.lib</TargetExt>
-    <TargetName>mimalloc-static</TargetName>
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <ClCompile>
-      <WarningLevel>Level4</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
-      <CompileAs>CompileAsCpp</CompileAs>
-      <SupportJustMyCode>false</SupportJustMyCode>
-      <LanguageStandard>Default</LanguageStandard>
-    </ClCompile>
-    <Lib>
-      <AdditionalLibraryDirectories>
-      </AdditionalLibraryDirectories>
-      <AdditionalDependencies>
-      </AdditionalDependencies>
-    </Lib>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <WarningLevel>Level4</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
-      <CompileAs>CompileAsCpp</CompileAs>
-      <SupportJustMyCode>false</SupportJustMyCode>
-      <LanguageStandard>Default</LanguageStandard>
-    </ClCompile>
-    <PostBuildEvent>
-      <Command>
-      </Command>
-    </PostBuildEvent>
-    <Link>
-      <EntryPointSymbol>
-      </EntryPointSymbol>
-    </Link>
-    <Lib>
-      <AdditionalLibraryDirectories>
-      </AdditionalLibraryDirectories>
-      <AdditionalDependencies>
-      </AdditionalDependencies>
-    </Lib>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <ClCompile>
-      <WarningLevel>Level4</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-      <AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
-      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
-      <WholeProgramOptimization>false</WholeProgramOptimization>
-      <BufferSecurityCheck>false</BufferSecurityCheck>
-      <InlineFunctionExpansion>Default</InlineFunctionExpansion>
-      <CompileAs>CompileAsCpp</CompileAs>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <LanguageStandard>Default</LanguageStandard>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-    </Link>
-    <Lib>
-      <AdditionalLibraryDirectories>
-      </AdditionalLibraryDirectories>
-      <AdditionalDependencies>
-      </AdditionalDependencies>
-    </Lib>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <WarningLevel>Level4</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-      <AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
-      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
-      <WholeProgramOptimization>false</WholeProgramOptimization>
-      <BufferSecurityCheck>false</BufferSecurityCheck>
-      <InlineFunctionExpansion>Default</InlineFunctionExpansion>
-      <CompileAs>CompileAsCpp</CompileAs>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <LanguageStandard>Default</LanguageStandard>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <EntryPointSymbol>
-      </EntryPointSymbol>
-    </Link>
-    <PostBuildEvent>
-      <Command>
-      </Command>
-    </PostBuildEvent>
-    <Lib>
-      <AdditionalLibraryDirectories>
-      </AdditionalLibraryDirectories>
-      <AdditionalDependencies>
-      </AdditionalDependencies>
-    </Lib>
-  </ItemDefinitionGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\src\alloc-aligned.c">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="..\..\src\alloc-override-osx.c">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="..\..\src\alloc-override.c">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="..\..\src\alloc-posix.c" />
-    <ClCompile Include="..\..\src\alloc.c" />
-    <ClCompile Include="..\..\src\arena.c" />
-    <ClCompile Include="..\..\src\bitmap.c">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="..\..\src\heap.c" />
-    <ClCompile Include="..\..\src\init.c" />
-    <ClCompile Include="..\..\src\options.c" />
-    <ClCompile Include="..\..\src\page-queue.c">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="..\..\src\page.c" />
-    <ClCompile Include="..\..\src\random.c" />
-    <ClCompile Include="..\..\src\segment-cache.c" />
-    <ClCompile Include="..\..\src\segment.c" />
-    <ClCompile Include="..\..\src\os.c" />
-    <ClCompile Include="..\..\src\stats.c" />
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc-internal.h" />
-    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h" />
-    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc-atomic.h" />
-    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc-override.h" />
-    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc-types.h" />
-    <ClInclude Include="..\..\include\mimalloc-new-delete.h" />
-    <ClInclude Include="..\..\src\bitmap.h" />
-  </ItemGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
-</Project>
\ No newline at end of file
diff --git a/3rd/mimalloc-2.0.9/ide/vs2022/mimalloc-override.vcxproj b/3rd/mimalloc-2.0.9/ide/vs2022/mimalloc-override.vcxproj
deleted file mode 100644
index 54964d96..00000000
--- a/3rd/mimalloc-2.0.9/ide/vs2022/mimalloc-override.vcxproj
+++ /dev/null
@@ -1,257 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|Win32">
-      <Configuration>Debug</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|Win32">
-      <Configuration>Release</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <VCProjectVersion>15.0</VCProjectVersion>
-    <ProjectGuid>{ABB5EAE7-B3E6-432E-B636-333449892EA7}</ProjectGuid>
-    <RootNamespace>mimalloc-override</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
-    <ProjectName>mimalloc-override</ProjectName>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>DynamicLibrary</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
-    <ConfigurationType>DynamicLibrary</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>DynamicLibrary</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>DynamicLibrary</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Label="Shared">
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-    <TargetExt>.dll</TargetExt>
-    <TargetName>mimalloc-override</TargetName>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-    <TargetExt>.dll</TargetExt>
-    <TargetName>mimalloc-override</TargetName>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-    <TargetExt>.dll</TargetExt>
-    <TargetName>mimalloc-override</TargetName>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-    <TargetExt>.dll</TargetExt>
-    <TargetName>mimalloc-override</TargetName>
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
-      <SupportJustMyCode>false</SupportJustMyCode>
-      <CompileAs>Default</CompileAs>
-    </ClCompile>
-    <Link>
-      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <IgnoreSpecificDefaultLibraries>
-      </IgnoreSpecificDefaultLibraries>
-      <ModuleDefinitionFile>
-      </ModuleDefinitionFile>
-      <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
-      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
-    </Link>
-    <PostBuildEvent>
-      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect32.dll" "$(OutputPath)"</Command>
-    </PostBuildEvent>
-    <PostBuildEvent>
-      <Message>Copy mimalloc-redirect32.dll to the output directory</Message>
-    </PostBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MI_DEBUG=4;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
-      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
-      <SupportJustMyCode>false</SupportJustMyCode>
-      <CompileAs>Default</CompileAs>
-    </ClCompile>
-    <Link>
-      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <IgnoreSpecificDefaultLibraries>
-      </IgnoreSpecificDefaultLibraries>
-      <ModuleDefinitionFile>
-      </ModuleDefinitionFile>
-      <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
-      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
-    </Link>
-    <PostBuildEvent>
-      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect.dll" "$(OutputPath)"</Command>
-    </PostBuildEvent>
-    <PostBuildEvent>
-      <Message>copy mimalloc-redirect.dll to the output directory</Message>
-    </PostBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-      <AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
-      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
-      <WholeProgramOptimization>false</WholeProgramOptimization>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
-      <CompileAs>Default</CompileAs>
-      <BufferSecurityCheck>false</BufferSecurityCheck>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <ModuleDefinitionFile>
-      </ModuleDefinitionFile>
-      <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
-      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
-    </Link>
-    <PostBuildEvent>
-      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect32.dll" "$(OutputPath)"</Command>
-    </PostBuildEvent>
-    <PostBuildEvent>
-      <Message>Copy mimalloc-redirect32.dll to the output directory</Message>
-    </PostBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-      <AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
-      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
-      <WholeProgramOptimization>false</WholeProgramOptimization>
-      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
-      <CompileAs>Default</CompileAs>
-      <BufferSecurityCheck>false</BufferSecurityCheck>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <ModuleDefinitionFile>
-      </ModuleDefinitionFile>
-      <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
-      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
-    </Link>
-    <PostBuildEvent>
-      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect.dll" "$(OutputPath)"</Command>
-    </PostBuildEvent>
-    <PostBuildEvent>
-      <Message>copy mimalloc-redirect.dll to the output directory</Message>
-    </PostBuildEvent>
-  </ItemDefinitionGroup>
-  <ItemGroup>
-    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc-internal.h" />
-    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h" />
-    <ClInclude Include="..\..\include\mimalloc-atomic.h" />
-    <ClInclude Include="..\..\include\mimalloc-new-delete.h" />
-    <ClInclude Include="..\..\include\mimalloc-override.h" />
-    <ClInclude Include="..\..\include\mimalloc-track.h" />
-    <ClInclude Include="..\..\include\mimalloc-types.h" />
-    <ClInclude Include="..\..\src\bitmap.h" />
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\src\alloc-aligned.c">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="..\..\src\alloc-override.c">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="..\..\src\alloc-posix.c" />
-    <ClCompile Include="..\..\src\alloc.c" />
-    <ClCompile Include="..\..\src\arena.c" />
-    <ClCompile Include="..\..\src\bitmap.c" />
-    <ClCompile Include="..\..\src\heap.c" />
-    <ClCompile Include="..\..\src\init.c" />
-    <ClCompile Include="..\..\src\options.c" />
-    <ClCompile Include="..\..\src\os.c" />
-    <ClCompile Include="..\..\src\page-queue.c">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="..\..\src\page.c" />
-    <ClCompile Include="..\..\src\random.c" />
-    <ClCompile Include="..\..\src\segment-cache.c" />
-    <ClCompile Include="..\..\src\segment.c" />
-    <ClCompile Include="..\..\src\stats.c" />
-  </ItemGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
-</Project>
\ No newline at end of file
diff --git a/3rd/mimalloc-2.0.9/ide/vs2022/mimalloc-test-api.vcxproj b/3rd/mimalloc-2.0.9/ide/vs2022/mimalloc-test-api.vcxproj
deleted file mode 100644
index 6023c251..00000000
--- a/3rd/mimalloc-2.0.9/ide/vs2022/mimalloc-test-api.vcxproj
+++ /dev/null
@@ -1,155 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|Win32">
-      <Configuration>Debug</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|Win32">
-      <Configuration>Release</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <VCProjectVersion>15.0</VCProjectVersion>
-    <ProjectGuid>{FFF7958F-750E-4C21-A04D-22707CC66878}</ProjectGuid>
-    <RootNamespace>mimalloc-test-api</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
-    <ProjectName>mimalloc-test-api</ProjectName>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Label="Shared">
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <SubSystem>Console</SubSystem>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <SubSystem>Console</SubSystem>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\test\test-api.c">
-    </ClCompile>
-  </ItemGroup>
-  <ItemGroup>
-    <ProjectReference Include="mimalloc.vcxproj">
-      <Project>{abb5eae7-b3e6-432e-b636-333449892ea6}</Project>
-    </ProjectReference>
-  </ItemGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
-</Project>
\ No newline at end of file
diff --git a/3rd/mimalloc-2.0.9/ide/vs2022/mimalloc.sln b/3rd/mimalloc-2.0.9/ide/vs2022/mimalloc.sln
deleted file mode 100644
index 6ff01d3b..00000000
--- a/3rd/mimalloc-2.0.9/ide/vs2022/mimalloc.sln
+++ /dev/null
@@ -1,81 +0,0 @@
-﻿
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 16
-VisualStudioVersion = 16.0.29709.97
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc", "mimalloc.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA6}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test", "mimalloc-test.vcxproj", "{FEF7858F-750E-4C21-A04D-22707CC66878}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override", "mimalloc-override.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA7}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override-test", "mimalloc-override-test.vcxproj", "{FEF7868F-750E-4C21-A04D-22707CC66879}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test-stress", "mimalloc-test-stress.vcxproj", "{FEF7958F-750E-4C21-A04D-22707CC66878}"
-EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test-api", "mimalloc-test-api.vcxproj", "{FFF7958F-750E-4C21-A04D-22707CC66878}"
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|x64 = Debug|x64
-		Debug|x86 = Debug|x86
-		Release|x64 = Release|x64
-		Release|x86 = Release|x86
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x64.ActiveCfg = Debug|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x64.Build.0 = Debug|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x86.ActiveCfg = Debug|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x86.Build.0 = Debug|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x64.ActiveCfg = Release|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x64.Build.0 = Release|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x86.ActiveCfg = Release|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x86.Build.0 = Release|Win32
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
-		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x64.ActiveCfg = Debug|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x64.Build.0 = Debug|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x86.ActiveCfg = Debug|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x86.Build.0 = Debug|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x64.ActiveCfg = Release|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x64.Build.0 = Release|x64
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x86.ActiveCfg = Release|Win32
-		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x86.Build.0 = Release|Win32
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x64.ActiveCfg = Debug|x64
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x64.Build.0 = Debug|x64
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x86.ActiveCfg = Debug|Win32
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x86.Build.0 = Debug|Win32
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x64.ActiveCfg = Release|x64
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x64.Build.0 = Release|x64
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x86.ActiveCfg = Release|Win32
-		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x86.Build.0 = Release|Win32
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
-		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
-		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-	GlobalSection(ExtensibilityGlobals) = postSolution
-		SolutionGuid = {4297F93D-486A-4243-995F-7D32F59AE82A}
-	EndGlobalSection
-EndGlobal
diff --git a/3rd/mimalloc-2.0.9/ide/vs2022/mimalloc.vcxproj b/3rd/mimalloc-2.0.9/ide/vs2022/mimalloc.vcxproj
deleted file mode 100644
index 9811aa55..00000000
--- a/3rd/mimalloc-2.0.9/ide/vs2022/mimalloc.vcxproj
+++ /dev/null
@@ -1,255 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|Win32">
-      <Configuration>Debug</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|Win32">
-      <Configuration>Release</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <VCProjectVersion>15.0</VCProjectVersion>
-    <ProjectGuid>{ABB5EAE7-B3E6-432E-B636-333449892EA6}</ProjectGuid>
-    <RootNamespace>mimalloc</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
-    <ProjectName>mimalloc</ProjectName>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>StaticLibrary</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v143</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Label="Shared">
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-    <TargetExt>.lib</TargetExt>
-    <TargetName>mimalloc-static</TargetName>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-    <TargetExt>.lib</TargetExt>
-    <TargetName>mimalloc-static</TargetName>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-    <TargetExt>.lib</TargetExt>
-    <TargetName>mimalloc-static</TargetName>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
-    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
-    <TargetExt>.lib</TargetExt>
-    <TargetName>mimalloc-static</TargetName>
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <ClCompile>
-      <WarningLevel>Level4</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
-      <CompileAs>CompileAsCpp</CompileAs>
-      <SupportJustMyCode>false</SupportJustMyCode>
-      <LanguageStandard>Default</LanguageStandard>
-    </ClCompile>
-    <Lib>
-      <AdditionalLibraryDirectories>
-      </AdditionalLibraryDirectories>
-      <AdditionalDependencies>
-      </AdditionalDependencies>
-    </Lib>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <WarningLevel>Level4</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>MI_DEBUG=4;MI_SECURE=0;%(PreprocessorDefinitions);</PreprocessorDefinitions>
-      <CompileAs>CompileAsCpp</CompileAs>
-      <SupportJustMyCode>false</SupportJustMyCode>
-      <LanguageStandard>stdcpp20</LanguageStandard>
-    </ClCompile>
-    <PostBuildEvent>
-      <Command>
-      </Command>
-    </PostBuildEvent>
-    <Link>
-      <EntryPointSymbol>
-      </EntryPointSymbol>
-    </Link>
-    <Lib>
-      <AdditionalLibraryDirectories>
-      </AdditionalLibraryDirectories>
-      <AdditionalDependencies>
-      </AdditionalDependencies>
-    </Lib>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <ClCompile>
-      <WarningLevel>Level4</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-      <AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
-      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
-      <WholeProgramOptimization>false</WholeProgramOptimization>
-      <BufferSecurityCheck>false</BufferSecurityCheck>
-      <InlineFunctionExpansion>Default</InlineFunctionExpansion>
-      <CompileAs>CompileAsCpp</CompileAs>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <LanguageStandard>Default</LanguageStandard>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-    </Link>
-    <Lib>
-      <AdditionalLibraryDirectories>
-      </AdditionalLibraryDirectories>
-      <AdditionalDependencies>
-      </AdditionalDependencies>
-    </Lib>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <WarningLevel>Level4</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <ConformanceMode>true</ConformanceMode>
-      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
-      <AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
-      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
-      <WholeProgramOptimization>false</WholeProgramOptimization>
-      <BufferSecurityCheck>false</BufferSecurityCheck>
-      <InlineFunctionExpansion>Default</InlineFunctionExpansion>
-      <CompileAs>CompileAsCpp</CompileAs>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <LanguageStandard>stdcpp20</LanguageStandard>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <EntryPointSymbol>
-      </EntryPointSymbol>
-    </Link>
-    <PostBuildEvent>
-      <Command>
-      </Command>
-    </PostBuildEvent>
-    <Lib>
-      <AdditionalLibraryDirectories>
-      </AdditionalLibraryDirectories>
-      <AdditionalDependencies>
-      </AdditionalDependencies>
-    </Lib>
-  </ItemDefinitionGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\src\alloc-aligned.c">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="..\..\src\alloc-override-osx.c">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="..\..\src\alloc-override.c">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="..\..\src\alloc-posix.c" />
-    <ClCompile Include="..\..\src\alloc.c" />
-    <ClCompile Include="..\..\src\arena.c" />
-    <ClCompile Include="..\..\src\bitmap.c">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="..\..\src\heap.c" />
-    <ClCompile Include="..\..\src\init.c" />
-    <ClCompile Include="..\..\src\options.c" />
-    <ClCompile Include="..\..\src\page-queue.c">
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
-      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
-    </ClCompile>
-    <ClCompile Include="..\..\src\page.c" />
-    <ClCompile Include="..\..\src\random.c" />
-    <ClCompile Include="..\..\src\segment-cache.c" />
-    <ClCompile Include="..\..\src\segment.c" />
-    <ClCompile Include="..\..\src\os.c" />
-    <ClCompile Include="..\..\src\stats.c" />
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc-internal.h" />
-    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h" />
-    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc-atomic.h" />
-    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc-override.h" />
-    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc-types.h" />
-    <ClInclude Include="..\..\include\mimalloc-new-delete.h" />
-    <ClInclude Include="..\..\include\mimalloc-track.h" />
-    <ClInclude Include="..\..\src\bitmap.h" />
-  </ItemGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
-</Project>
\ No newline at end of file
diff --git a/3rd/mimalloc-2.0.9/include/mimalloc-internal.h b/3rd/mimalloc-2.0.9/include/mimalloc-internal.h
deleted file mode 100644
index a68e6966..00000000
--- a/3rd/mimalloc-2.0.9/include/mimalloc-internal.h
+++ /dev/null
@@ -1,1116 +0,0 @@
-/* ----------------------------------------------------------------------------
-Copyright (c) 2018-2022, Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
------------------------------------------------------------------------------*/
-#pragma once
-#ifndef MIMALLOC_INTERNAL_H
-#define MIMALLOC_INTERNAL_H
-
-#include "mimalloc-types.h"
-#include "mimalloc-track.h"
-
-#if (MI_DEBUG>0)
-#define mi_trace_message(...)  _mi_trace_message(__VA_ARGS__)
-#else
-#define mi_trace_message(...)
-#endif
-
-#define MI_CACHE_LINE          64
-#if defined(_MSC_VER)
-#pragma warning(disable:4127)   // suppress constant conditional warning (due to MI_SECURE paths)
-#pragma warning(disable:26812)  // unscoped enum warning
-#define mi_decl_noinline        __declspec(noinline)
-#define mi_decl_thread          __declspec(thread)
-#define mi_decl_cache_align     __declspec(align(MI_CACHE_LINE))
-#elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc
-#define mi_decl_noinline        __attribute__((noinline))
-#define mi_decl_thread          __thread
-#define mi_decl_cache_align     __attribute__((aligned(MI_CACHE_LINE)))
-#else
-#define mi_decl_noinline
-#define mi_decl_thread          __thread        // hope for the best :-)
-#define mi_decl_cache_align
-#endif
-
-#if defined(__EMSCRIPTEN__) && !defined(__wasi__)
-#define __wasi__
-#endif
-
-#if defined(__cplusplus)
-#define mi_decl_externc       extern "C"
-#else
-#define mi_decl_externc
-#endif
-
-#if !defined(_WIN32) && !defined(__wasi__)
-#define  MI_USE_PTHREADS
-#include <pthread.h>
-#endif
-
-// "options.c"
-void       _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message);
-void       _mi_fprintf(mi_output_fun* out, void* arg, const char* fmt, ...);
-void       _mi_warning_message(const char* fmt, ...);
-void       _mi_verbose_message(const char* fmt, ...);
-void       _mi_trace_message(const char* fmt, ...);
-void       _mi_options_init(void);
-void       _mi_error_message(int err, const char* fmt, ...);
-
-// random.c
-void       _mi_random_init(mi_random_ctx_t* ctx);
-void       _mi_random_init_weak(mi_random_ctx_t* ctx);
-void       _mi_random_reinit_if_weak(mi_random_ctx_t * ctx);
-void       _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx);
-uintptr_t  _mi_random_next(mi_random_ctx_t* ctx);
-uintptr_t  _mi_heap_random_next(mi_heap_t* heap);
-uintptr_t  _mi_os_random_weak(uintptr_t extra_seed);
-static inline uintptr_t _mi_random_shuffle(uintptr_t x);
-
-// init.c
-extern mi_decl_cache_align mi_stats_t       _mi_stats_main;
-extern mi_decl_cache_align const mi_page_t  _mi_page_empty;
-bool       _mi_is_main_thread(void);
-size_t     _mi_current_thread_count(void);
-bool       _mi_preloading(void);  // true while the C runtime is not ready
-
-// os.c
-size_t     _mi_os_page_size(void);
-void       _mi_os_init(void);                                      // called from process init
-void*      _mi_os_alloc(size_t size, mi_stats_t* stats);           // to allocate thread local data
-void       _mi_os_free(void* p, size_t size, mi_stats_t* stats);   // to free thread local data
-
-bool       _mi_os_protect(void* addr, size_t size);
-bool       _mi_os_unprotect(void* addr, size_t size);
-bool       _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* stats);
-bool       _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
-bool       _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
-// bool       _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
-size_t     _mi_os_good_alloc_size(size_t size);
-bool       _mi_os_has_overcommit(void);
-bool       _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats);
-
-void*      _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool* large, mi_stats_t* tld_stats);
-void       _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats);
-
-// arena.c
-void*      _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
-void*      _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
-void       _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats);
-mi_arena_id_t _mi_arena_id_none(void);
-bool       _mi_arena_memid_is_suitable(size_t memid, mi_arena_id_t req_arena_id);
-
-// "segment-cache.c"
-void*      _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
-bool       _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld);
-void       _mi_segment_cache_collect(bool force, mi_os_tld_t* tld);
-void       _mi_segment_cache_free_all(mi_os_tld_t* tld);
-void       _mi_segment_map_allocated_at(const mi_segment_t* segment);
-void       _mi_segment_map_freed_at(const mi_segment_t* segment);
-
-// "segment.c"
-mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
-void       _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld);
-void       _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
-bool       _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld);
-void       _mi_segment_thread_collect(mi_segments_tld_t* tld);
-
-#if MI_HUGE_PAGE_ABANDON
-void       _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block);
-#else
-void       _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_block_t* block);
-#endif
-
-uint8_t*   _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page
-void       _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
-void       _mi_abandoned_await_readers(void);
-void       _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld);
-
-
-
-// "page.c"
-void*      _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment)  mi_attr_noexcept mi_attr_malloc;
-
-void       _mi_page_retire(mi_page_t* page) mi_attr_noexcept;                  // free the page if there are no other pages with many free blocks
-void       _mi_page_unfull(mi_page_t* page);
-void       _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force);   // free the page
-void       _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq);            // abandon the page, to be picked up by another thread...
-void       _mi_heap_delayed_free_all(mi_heap_t* heap);
-bool       _mi_heap_delayed_free_partial(mi_heap_t* heap);
-void       _mi_heap_collect_retired(mi_heap_t* heap, bool force);
-
-void       _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never);
-bool       _mi_page_try_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never);
-size_t     _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append);
-void       _mi_deferred_free(mi_heap_t* heap, bool force);
-
-void       _mi_page_free_collect(mi_page_t* page,bool force);
-void       _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page);   // callback from segments
-
-size_t     _mi_bin_size(uint8_t bin);           // for stats
-uint8_t    _mi_bin(size_t size);                // for stats
-
-// "heap.c"
-void       _mi_heap_destroy_pages(mi_heap_t* heap);
-void       _mi_heap_collect_abandon(mi_heap_t* heap);
-void       _mi_heap_set_default_direct(mi_heap_t* heap);
-bool       _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid);
-void       _mi_heap_destroy_all(void);
-
-// "stats.c"
-void       _mi_stats_done(mi_stats_t* stats);
-
-mi_msecs_t  _mi_clock_now(void);
-mi_msecs_t  _mi_clock_end(mi_msecs_t start);
-mi_msecs_t  _mi_clock_start(void);
-
-// "alloc.c"
-void*       _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept;  // called from `_mi_malloc_generic`
-void*       _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept;
-void*       _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept;     // called from `_mi_heap_malloc_aligned`
-void*       _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept;
-mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p);
-bool        _mi_free_delayed_block(mi_block_t* block);
-void        _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept;  // for runtime integration
-
-#if MI_DEBUG>1
-bool        _mi_page_is_valid(mi_page_t* page);
-#endif
-
-
-// ------------------------------------------------------
-// Branches
-// ------------------------------------------------------
-
-#if defined(__GNUC__) || defined(__clang__)
-#define mi_unlikely(x)     (__builtin_expect(!!(x),false))
-#define mi_likely(x)       (__builtin_expect(!!(x),true))
-#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)
-#define mi_unlikely(x)     (x) [[unlikely]]
-#define mi_likely(x)       (x) [[likely]]
-#else
-#define mi_unlikely(x)     (x)
-#define mi_likely(x)       (x)
-#endif
-
-#ifndef __has_builtin
-#define __has_builtin(x)  0
-#endif
-
-
-/* -----------------------------------------------------------
-  Error codes passed to `_mi_fatal_error`
-  All are recoverable but EFAULT is a serious error and aborts by default in secure mode.
-  For portability define undefined error codes using common Unix codes:
-  <https://www-numi.fnal.gov/offline_software/srt_public_context/WebDocs/Errors/unix_system_errors.html>
------------------------------------------------------------ */
-#include <errno.h>
-#ifndef EAGAIN         // double free
-#define EAGAIN (11)
-#endif
-#ifndef ENOMEM         // out of memory
-#define ENOMEM (12)
-#endif
-#ifndef EFAULT         // corrupted free-list or meta-data
-#define EFAULT (14)
-#endif
-#ifndef EINVAL         // trying to free an invalid pointer
-#define EINVAL (22)
-#endif
-#ifndef EOVERFLOW      // count*size overflow
-#define EOVERFLOW (75)
-#endif
-
-
-/* -----------------------------------------------------------
-  Inlined definitions
------------------------------------------------------------ */
-#define MI_UNUSED(x)     (void)(x)
-#if (MI_DEBUG>0)
-#define MI_UNUSED_RELEASE(x)
-#else
-#define MI_UNUSED_RELEASE(x)  MI_UNUSED(x)
-#endif
-
-#define MI_INIT4(x)   x(),x(),x(),x()
-#define MI_INIT8(x)   MI_INIT4(x),MI_INIT4(x)
-#define MI_INIT16(x)  MI_INIT8(x),MI_INIT8(x)
-#define MI_INIT32(x)  MI_INIT16(x),MI_INIT16(x)
-#define MI_INIT64(x)  MI_INIT32(x),MI_INIT32(x)
-#define MI_INIT128(x) MI_INIT64(x),MI_INIT64(x)
-#define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x)
-
-
-// Is `x` a power of two? (0 is considered a power of two)
-static inline bool _mi_is_power_of_two(uintptr_t x) {
-  return ((x & (x - 1)) == 0);
-}
-
-// Is a pointer aligned?
-static inline bool _mi_is_aligned(void* p, size_t alignment) {
-  mi_assert_internal(alignment != 0);
-  return (((uintptr_t)p % alignment) == 0);
-}
-
-// Align upwards
-static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
-  mi_assert_internal(alignment != 0);
-  uintptr_t mask = alignment - 1;
-  if ((alignment & mask) == 0) {  // power of two?
-    return ((sz + mask) & ~mask);
-  }
-  else {
-    return (((sz + mask)/alignment)*alignment);
-  }
-}
-
-// Align downwards
-static inline uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) {
-  mi_assert_internal(alignment != 0);
-  uintptr_t mask = alignment - 1;
-  if ((alignment & mask) == 0) { // power of two?
-    return (sz & ~mask);
-  }
-  else {
-    return ((sz / alignment) * alignment);
-  }
-}
-
-// Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`.
-static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) {
-  mi_assert_internal(divider != 0);
-  return (divider == 0 ? size : ((size + divider - 1) / divider));
-}
-
-// Is memory zero initialized?
-static inline bool mi_mem_is_zero(void* p, size_t size) {
-  for (size_t i = 0; i < size; i++) {
-    if (((uint8_t*)p)[i] != 0) return false;
-  }
-  return true;
-}
-
-
-// Align a byte size to a size in _machine words_,
-// i.e. byte size == `wsize*sizeof(void*)`.
-static inline size_t _mi_wsize_from_size(size_t size) {
-  mi_assert_internal(size <= SIZE_MAX - sizeof(uintptr_t));
-  return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t);
-}
-
-// Overflow detecting multiply
-#if __has_builtin(__builtin_umul_overflow) || (defined(__GNUC__) && (__GNUC__ >= 5))
-#include <limits.h>      // UINT_MAX, ULONG_MAX
-#if defined(_CLOCK_T)    // for Illumos
-#undef _CLOCK_T
-#endif
-static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
-  #if (SIZE_MAX == ULONG_MAX)
-    return __builtin_umull_overflow(count, size, (unsigned long *)total);
-  #elif (SIZE_MAX == UINT_MAX)
-    return __builtin_umul_overflow(count, size, (unsigned int *)total);
-  #else
-    return __builtin_umulll_overflow(count, size, (unsigned long long *)total);
-  #endif
-}
-#else /* __builtin_umul_overflow is unavailable */
-static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
-  #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t)))  // sqrt(SIZE_MAX)
-  *total = count * size;
-  // note: gcc/clang optimize this to directly check the overflow flag
-  return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) && size > 0 && (SIZE_MAX / size) < count);
-}
-#endif
-
-// Safe multiply `count*size` into `total`; return `true` on overflow.
-static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* total) {
-  if (count==1) {  // quick check for the case where count is one (common for C++ allocators)
-    *total = size;
-    return false;
-  }
-  else if mi_unlikely(mi_mul_overflow(count, size, total)) {
-    #if MI_DEBUG > 0
-    _mi_error_message(EOVERFLOW, "allocation request is too large (%zu * %zu bytes)\n", count, size);
-    #endif
-    *total = SIZE_MAX;
-    return true;
-  }
-  else return false;
-}
-
-
-/* ----------------------------------------------------------------------------------------
-The thread local default heap: `_mi_get_default_heap` returns the thread local heap.
-On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a
-__thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures
-that the storage will always be available (allocated on the thread stacks).
-On some platforms though we cannot use that when overriding `malloc` since the underlying
-TLS implementation (or the loader) will call itself `malloc` on a first access and recurse.
-We try to circumvent this in an efficient way:
-- macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the
-           loader itself calls `malloc` even before the modules are initialized.
-- OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS).
-- DragonFly: defaults are working but seem slow compared to freeBSD (see PR #323)
-------------------------------------------------------------------------------------------- */
-
-extern const mi_heap_t _mi_heap_empty;  // read-only empty heap, initial value of the thread local default heap
-extern bool _mi_process_is_initialized;
-mi_heap_t*  _mi_heap_main_get(void);    // statically allocated main backing heap
-
-#if defined(MI_MALLOC_OVERRIDE)
-#if defined(__APPLE__) // macOS
-#define MI_TLS_SLOT               89  // seems unused?
-// #define MI_TLS_RECURSE_GUARD 1
-// other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89)
-// see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h>
-#elif defined(__OpenBSD__)
-// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16)
-// see <https://github.com/openbsd/src/blob/master/lib/libc/include/thread_private.h#L371>
-#define MI_TLS_PTHREAD_SLOT_OFS   (6*sizeof(int) + 4*sizeof(void*) + 24)
-// #elif defined(__DragonFly__)
-// #warning "mimalloc is not working correctly on DragonFly yet."
-// #define MI_TLS_PTHREAD_SLOT_OFS   (4 + 1*sizeof(void*))  // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458>
-#elif defined(__ANDROID__)
-// See issue #381
-#define MI_TLS_PTHREAD
-#endif
-#endif
-
-#if defined(MI_TLS_SLOT)
-static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept;   // forward declaration
-#elif defined(MI_TLS_PTHREAD_SLOT_OFS)
-static inline mi_heap_t** mi_tls_pthread_heap_slot(void) {
-  pthread_t self = pthread_self();
-  #if defined(__DragonFly__)
-  if (self==NULL) {
-    mi_heap_t* pheap_main = _mi_heap_main_get();
-    return &pheap_main;
-  }
-  #endif
-  return (mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS);
-}
-#elif defined(MI_TLS_PTHREAD)
-extern pthread_key_t _mi_heap_default_key;
-#endif
-
-// Default heap to allocate from (if not using TLS- or pthread slots).
-// Do not use this directly but use through `mi_heap_get_default()` (or the unchecked `mi_get_default_heap`).
-// This thread local variable is only used when neither MI_TLS_SLOT, MI_TLS_PTHREAD, or MI_TLS_PTHREAD_SLOT_OFS are defined.
-// However, on the Apple M1 we do use the address of this variable as the unique thread-id (issue #356).
-extern mi_decl_thread mi_heap_t* _mi_heap_default;  // default heap to allocate from
-
-static inline mi_heap_t* mi_get_default_heap(void) {
-#if defined(MI_TLS_SLOT)
-  mi_heap_t* heap = (mi_heap_t*)mi_tls_slot(MI_TLS_SLOT);
-  if mi_unlikely(heap == NULL) {
-    #ifdef __GNUC__
-    __asm(""); // prevent conditional load of the address of _mi_heap_empty
-    #endif
-    heap = (mi_heap_t*)&_mi_heap_empty;
-  }
-  return heap;
-#elif defined(MI_TLS_PTHREAD_SLOT_OFS)
-  mi_heap_t* heap = *mi_tls_pthread_heap_slot();
-  return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
-#elif defined(MI_TLS_PTHREAD)
-  mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key));
-  return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
-#else
-  #if defined(MI_TLS_RECURSE_GUARD)
-  if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get();
-  #endif
-  return _mi_heap_default;
-#endif
-}
-
-static inline bool mi_heap_is_default(const mi_heap_t* heap) {
-  return (heap == mi_get_default_heap());
-}
-
-static inline bool mi_heap_is_backing(const mi_heap_t* heap) {
-  return (heap->tld->heap_backing == heap);
-}
-
-static inline bool mi_heap_is_initialized(mi_heap_t* heap) {
-  mi_assert_internal(heap != NULL);
-  return (heap != &_mi_heap_empty);
-}
-
-static inline uintptr_t _mi_ptr_cookie(const void* p) {
-  extern mi_heap_t _mi_heap_main;
-  mi_assert_internal(_mi_heap_main.cookie != 0);
-  return ((uintptr_t)p ^ _mi_heap_main.cookie);
-}
-
-/* -----------------------------------------------------------
-  Pages
------------------------------------------------------------ */
-
-static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t size) {
-  mi_assert_internal(size <= (MI_SMALL_SIZE_MAX + MI_PADDING_SIZE));
-  const size_t idx = _mi_wsize_from_size(size);
-  mi_assert_internal(idx < MI_PAGES_DIRECT);
-  return heap->pages_free_direct[idx];
-}
-
-// Get the page belonging to a certain size class
-static inline mi_page_t* _mi_get_free_small_page(size_t size) {
-  return _mi_heap_get_free_small_page(mi_get_default_heap(), size);
-}
-
-// Segment that contains the pointer
-// Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE),
-// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it;
-// therefore we align one byte before `p`.
-static inline mi_segment_t* _mi_ptr_segment(const void* p) {
-  mi_assert_internal(p != NULL);
-  return (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK);
-}
-
-static inline mi_page_t* mi_slice_to_page(mi_slice_t* s) {
-  mi_assert_internal(s->slice_offset== 0 && s->slice_count > 0);
-  return (mi_page_t*)(s);
-}
-
-static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) {
-  mi_assert_internal(p->slice_offset== 0 && p->slice_count > 0);
-  return (mi_slice_t*)(p);
-}
-
-// Segment belonging to a page
-static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
-  mi_segment_t* segment = _mi_ptr_segment(page); 
-  mi_assert_internal(segment == NULL || ((mi_slice_t*)page >= segment->slices && (mi_slice_t*)page < segment->slices + segment->slice_entries));
-  return segment;
-}
-
-static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) {
-  mi_slice_t* start = (mi_slice_t*)((uint8_t*)slice - slice->slice_offset);
-  mi_assert_internal(start >= _mi_ptr_segment(slice)->slices);
-  mi_assert_internal(start->slice_offset == 0);
-  mi_assert_internal(start + start->slice_count > slice);
-  return start;
-}
-
-// Get the page containing the pointer (performance critical as it is called in mi_free)
-static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
-  mi_assert_internal(p > (void*)segment);
-  ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
-  mi_assert_internal(diff > 0 && diff <= (ptrdiff_t)MI_SEGMENT_SIZE);
-  size_t idx = (size_t)diff >> MI_SEGMENT_SLICE_SHIFT;
-  mi_assert_internal(idx <= segment->slice_entries);
-  mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx];
-  mi_slice_t* slice = mi_slice_first(slice0);  // adjust to the block that holds the page data
-  mi_assert_internal(slice->slice_offset == 0);
-  mi_assert_internal(slice >= segment->slices && slice < segment->slices + segment->slice_entries);
-  return mi_slice_to_page(slice);
-}
-
-// Quick page start for initialized pages
-static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) {
-  return _mi_segment_page_start(segment, page, page_size);
-}
-
-// Get the page containing the pointer
-static inline mi_page_t* _mi_ptr_page(void* p) {
-  return _mi_segment_page_of(_mi_ptr_segment(p), p);
-}
-
-// Get the block size of a page (special case for huge objects)
-static inline size_t mi_page_block_size(const mi_page_t* page) {
-  const size_t bsize = page->xblock_size;
-  mi_assert_internal(bsize > 0);
-  if mi_likely(bsize < MI_HUGE_BLOCK_SIZE) {
-    return bsize;
-  }
-  else {
-    size_t psize;
-    _mi_segment_page_start(_mi_page_segment(page), page, &psize);
-    return psize;
-  }
-}
-
-static inline bool mi_page_is_huge(const mi_page_t* page) {
-  return (_mi_page_segment(page)->kind == MI_SEGMENT_HUGE);
-}
-
-// Get the usable block size of a page without fixed padding.
-// This may still include internal padding due to alignment and rounding up size classes.
-static inline size_t mi_page_usable_block_size(const mi_page_t* page) {
-  return mi_page_block_size(page) - MI_PADDING_SIZE;
-}
-
-// size of a segment
-static inline size_t mi_segment_size(mi_segment_t* segment) {
-  return segment->segment_slices * MI_SEGMENT_SLICE_SIZE;
-}
-
-static inline uint8_t* mi_segment_end(mi_segment_t* segment) {
-  return (uint8_t*)segment + mi_segment_size(segment);
-}
-
-// Thread free access
-static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) {
-  return (mi_block_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & ~3);
-}
-
-static inline mi_delayed_t mi_page_thread_free_flag(const mi_page_t* page) {
-  return (mi_delayed_t)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & 3);
-}
-
-// Heap access
-static inline mi_heap_t* mi_page_heap(const mi_page_t* page) {
-  return (mi_heap_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xheap));
-}
-
-static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) {
-  mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING);
-  mi_atomic_store_release(&page->xheap,(uintptr_t)heap);
-}
-
-// Thread free flag helpers
-static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) {
-  return (mi_block_t*)(tf & ~0x03);
-}
-static inline mi_delayed_t mi_tf_delayed(mi_thread_free_t tf) {
-  return (mi_delayed_t)(tf & 0x03);
-}
-static inline mi_thread_free_t mi_tf_make(mi_block_t* block, mi_delayed_t delayed) {
-  return (mi_thread_free_t)((uintptr_t)block | (uintptr_t)delayed);
-}
-static inline mi_thread_free_t mi_tf_set_delayed(mi_thread_free_t tf, mi_delayed_t delayed) {
-  return mi_tf_make(mi_tf_block(tf),delayed);
-}
-static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t* block) {
-  return mi_tf_make(block, mi_tf_delayed(tf));
-}
-
-// are all blocks in a page freed?
-// note: needs up-to-date used count, (as the `xthread_free` list may not be empty). see `_mi_page_collect_free`.
-static inline bool mi_page_all_free(const mi_page_t* page) {
-  mi_assert_internal(page != NULL);
-  return (page->used == 0);
-}
-
-// are there any available blocks?
-static inline bool mi_page_has_any_available(const mi_page_t* page) {
-  mi_assert_internal(page != NULL && page->reserved > 0);
-  return (page->used < page->reserved || (mi_page_thread_free(page) != NULL));
-}
-
-// are there immediately available blocks, i.e. blocks available on the free list.
-static inline bool mi_page_immediate_available(const mi_page_t* page) {
-  mi_assert_internal(page != NULL);
-  return (page->free != NULL);
-}
-
-// is more than 7/8th of a page in use?
-static inline bool mi_page_mostly_used(const mi_page_t* page) {
-  if (page==NULL) return true;
-  uint16_t frac = page->reserved / 8U;
-  return (page->reserved - page->used <= frac);
-}
-
-static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) {
-  return &((mi_heap_t*)heap)->pages[_mi_bin(size)];
-}
-
-
-
-//-----------------------------------------------------------
-// Page flags
-//-----------------------------------------------------------
-static inline bool mi_page_is_in_full(const mi_page_t* page) {
-  return page->flags.x.in_full;
-}
-
-static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) {
-  page->flags.x.in_full = in_full;
-}
-
-static inline bool mi_page_has_aligned(const mi_page_t* page) {
-  return page->flags.x.has_aligned;
-}
-
-static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
-  page->flags.x.has_aligned = has_aligned;
-}
-
-
-/* -------------------------------------------------------------------
-Encoding/Decoding the free list next pointers
-
-This is to protect against buffer overflow exploits where the
-free list is mutated. Many hardened allocators xor the next pointer `p`
-with a secret key `k1`, as `p^k1`. This prevents overwriting with known
-values but might be still too weak: if the attacker can guess
-the pointer `p` this  can reveal `k1` (since `p^k1^p == k1`).
-Moreover, if multiple blocks can be read as well, the attacker can
-xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot
-about the pointers (and subsequently `k1`).
-
-Instead mimalloc uses an extra key `k2` and encodes as `((p^k2)<<<k1)+k1`.
-Since these operations are not associative, the above approaches do not
-work so well any more even if the `p` can be guesstimated. For example,
-for the read case we can subtract two entries to discard the `+k1` term,
-but that leads to `((p1^k2)<<<k1) - ((p2^k2)<<<k1)` at best.
-We include the left-rotation since xor and addition are otherwise linear
-in the lowest bit. Finally, both keys are unique per page which reduces
-the re-use of keys by a large factor.
-
-We also pass a separate `null` value to be used as `NULL` or otherwise
-`(k2<<<k1)+k1` would appear (too) often as a sentinel value.
-------------------------------------------------------------------- */
-
-static inline bool mi_is_in_same_segment(const void* p, const void* q) {
-  return (_mi_ptr_segment(p) == _mi_ptr_segment(q));
-}
-
-static inline bool mi_is_in_same_page(const void* p, const void* q) {
-  mi_segment_t* segment = _mi_ptr_segment(p);
-  if (_mi_ptr_segment(q) != segment) return false;
-  // assume q may be invalid // return (_mi_segment_page_of(segment, p) == _mi_segment_page_of(segment, q));
-  mi_page_t* page = _mi_segment_page_of(segment, p);
-  size_t psize;
-  uint8_t* start = _mi_segment_page_start(segment, page, &psize);
-  return (start <= (uint8_t*)q && (uint8_t*)q < start + psize);
-}
-
-static inline uintptr_t mi_rotl(uintptr_t x, uintptr_t shift) {
-  shift %= MI_INTPTR_BITS;
-  return (shift==0 ? x : ((x << shift) | (x >> (MI_INTPTR_BITS - shift))));
-}
-static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) {
-  shift %= MI_INTPTR_BITS;
-  return (shift==0 ? x : ((x >> shift) | (x << (MI_INTPTR_BITS - shift))));
-}
-
-static inline void* mi_ptr_decode(const void* null, const mi_encoded_t x, const uintptr_t* keys) {
-  void* p = (void*)(mi_rotr(x - keys[0], keys[0]) ^ keys[1]);
-  return (p==null ? NULL : p);
-}
-
-static inline mi_encoded_t mi_ptr_encode(const void* null, const void* p, const uintptr_t* keys) {
-  uintptr_t x = (uintptr_t)(p==NULL ? null : p);
-  return mi_rotl(x ^ keys[1], keys[0]) + keys[0];
-}
-
-static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, const uintptr_t* keys ) {
-  mi_track_mem_defined(block,sizeof(mi_block_t));
-  mi_block_t* next;
-  #ifdef MI_ENCODE_FREELIST
-  next = (mi_block_t*)mi_ptr_decode(null, block->next, keys);
-  #else
-  MI_UNUSED(keys); MI_UNUSED(null);
-  next = (mi_block_t*)block->next;
-  #endif
-  mi_track_mem_noaccess(block,sizeof(mi_block_t));
-  return next;
-}
-
-static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) {
-  mi_track_mem_undefined(block,sizeof(mi_block_t));
-  #ifdef MI_ENCODE_FREELIST
-  block->next = mi_ptr_encode(null, next, keys);
-  #else
-  MI_UNUSED(keys); MI_UNUSED(null);
-  block->next = (mi_encoded_t)next;
-  #endif
-  mi_track_mem_noaccess(block,sizeof(mi_block_t));
-}
-
-static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) {
-  #ifdef MI_ENCODE_FREELIST
-  mi_block_t* next = mi_block_nextx(page,block,page->keys);
-  // check for free list corruption: is `next` at least in the same page?
-  // TODO: check if `next` is `page->block_size` aligned?
-  if mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next)) {
-    _mi_error_message(EFAULT, "corrupted free list entry of size %zub at %p: value 0x%zx\n", mi_page_block_size(page), block, (uintptr_t)next);
-    next = NULL;
-  }
-  return next;
-  #else
-  MI_UNUSED(page);
-  return mi_block_nextx(page,block,NULL);
-  #endif
-}
-
-static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) {
-  #ifdef MI_ENCODE_FREELIST
-  mi_block_set_nextx(page,block,next, page->keys);
-  #else
-  MI_UNUSED(page);
-  mi_block_set_nextx(page,block,next,NULL);
-  #endif
-}
-
-
-// -------------------------------------------------------------------
-// commit mask
-// -------------------------------------------------------------------
-
-static inline void mi_commit_mask_create_empty(mi_commit_mask_t* cm) {
-  for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
-    cm->mask[i] = 0;
-  }
-}
-
-static inline void mi_commit_mask_create_full(mi_commit_mask_t* cm) {
-  for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
-    cm->mask[i] = ~((size_t)0);
-  }
-}
-
-static inline bool mi_commit_mask_is_empty(const mi_commit_mask_t* cm) {
-  for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
-    if (cm->mask[i] != 0) return false;
-  }
-  return true;
-}
-
-static inline bool mi_commit_mask_is_full(const mi_commit_mask_t* cm) {
-  for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
-    if (cm->mask[i] != ~((size_t)0)) return false;
-  }
-  return true;
-}
-
-// defined in `segment.c`:
-size_t _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total);
-size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx);
-
-#define mi_commit_mask_foreach(cm,idx,count) \
-  idx = 0; \
-  while ((count = _mi_commit_mask_next_run(cm,&idx)) > 0) { 
-        
-#define mi_commit_mask_foreach_end() \
-    idx += count; \
-  }
-      
-
-
-
-// -------------------------------------------------------------------
-// Fast "random" shuffle
-// -------------------------------------------------------------------
-
-static inline uintptr_t _mi_random_shuffle(uintptr_t x) {
-  if (x==0) { x = 17; }   // ensure we don't get stuck in generating zeros
-#if (MI_INTPTR_SIZE==8)
-  // by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
-  x ^= x >> 30;
-  x *= 0xbf58476d1ce4e5b9UL;
-  x ^= x >> 27;
-  x *= 0x94d049bb133111ebUL;
-  x ^= x >> 31;
-#elif (MI_INTPTR_SIZE==4)
-  // by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
-  x ^= x >> 16;
-  x *= 0x7feb352dUL;
-  x ^= x >> 15;
-  x *= 0x846ca68bUL;
-  x ^= x >> 16;
-#endif
-  return x;
-}
-
-// -------------------------------------------------------------------
-// Optimize numa node access for the common case (= one node)
-// -------------------------------------------------------------------
-
-int    _mi_os_numa_node_get(mi_os_tld_t* tld);
-size_t _mi_os_numa_node_count_get(void);
-
-extern _Atomic(size_t) _mi_numa_node_count;
-static inline int _mi_os_numa_node(mi_os_tld_t* tld) {
-  if mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1) { return 0; }
-  else return _mi_os_numa_node_get(tld);
-}
-static inline size_t _mi_os_numa_node_count(void) {
-  const size_t count = mi_atomic_load_relaxed(&_mi_numa_node_count);
-  if mi_likely(count > 0) { return count; }
-  else return _mi_os_numa_node_count_get();
-}
-
-
-// -------------------------------------------------------------------
-// Getting the thread id should be performant as it is called in the
-// fast path of `_mi_free` and we specialize for various platforms.
-// We only require _mi_threadid() to return a unique id for each thread.
-// -------------------------------------------------------------------
-#if defined(_WIN32)
-
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
-  // Windows: works on Intel and ARM in both 32- and 64-bit
-  return (uintptr_t)NtCurrentTeb();
-}
-
-// We use assembly for a fast thread id on the main platforms. The TLS layout depends on
-// both the OS and libc implementation so we use specific tests for each main platform.
-// If you test on another platform and it works please send a PR :-)
-// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register.
-#elif defined(__GNUC__) && ( \
-           (defined(__GLIBC__)   && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \
-        || (defined(__APPLE__)   && (defined(__x86_64__) || defined(__aarch64__))) \
-        || (defined(__BIONIC__)  && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \
-        || (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \
-        || (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \
-      )
-
-static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept {
-  void* res;
-  const size_t ofs = (slot*sizeof(void*));
-  #if defined(__i386__)
-    __asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // x86 32-bit always uses GS
-  #elif defined(__APPLE__) && defined(__x86_64__)
-    __asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // x86_64 macOSX uses GS
-  #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
-    __asm__("movl %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // x32 ABI
-  #elif defined(__x86_64__)
-    __asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // x86_64 Linux, BSD uses FS
-  #elif defined(__arm__)
-    void** tcb; MI_UNUSED(ofs);
-    __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
-    res = tcb[slot];
-  #elif defined(__aarch64__)
-    void** tcb; MI_UNUSED(ofs);
-    #if defined(__APPLE__) // M1, issue #343
-    __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb));
-    #else
-    __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
-    #endif
-    res = tcb[slot];
-  #endif
-  return res;
-}
-
-// setting a tls slot is only used on macOS for now
-static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept {
-  const size_t ofs = (slot*sizeof(void*));
-  #if defined(__i386__)
-    __asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : );  // 32-bit always uses GS
-  #elif defined(__APPLE__) && defined(__x86_64__)
-    __asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : );  // x86_64 macOS uses GS
-  #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
-    __asm__("movl %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : );  // x32 ABI
-  #elif defined(__x86_64__)
-    __asm__("movq %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : );  // x86_64 Linux, BSD uses FS
-  #elif defined(__arm__)
-    void** tcb; MI_UNUSED(ofs);
-    __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
-    tcb[slot] = value;
-  #elif defined(__aarch64__)
-    void** tcb; MI_UNUSED(ofs);
-    #if defined(__APPLE__) // M1, issue #343
-    __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb));
-    #else
-    __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
-    #endif
-    tcb[slot] = value;
-  #endif
-}
-
-static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
-  #if defined(__BIONIC__)
-    // issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id
-    // see: https://github.com/aosp-mirror/platform_bionic/blob/c44b1d0676ded732df4b3b21c5f798eacae93228/libc/platform/bionic/tls_defines.h#L86
-    return (uintptr_t)mi_tls_slot(1);
-  #else
-    // in all our other targets, slot 0 is the thread id
-    // glibc: https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/x86_64/nptl/tls.h
-    // apple: https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L36
-    return (uintptr_t)mi_tls_slot(0);
-  #endif
-}
-
-#else
-
-// otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms).
-static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
-  return (uintptr_t)&_mi_heap_default;
-}
-
-#endif
-
-
-// -----------------------------------------------------------------------
-// Count bits: trailing or leading zeros (with MI_INTPTR_BITS on all zero)
-// -----------------------------------------------------------------------
-
-#if defined(__GNUC__)
-
-#include <limits.h>       // LONG_MAX
-#define MI_HAVE_FAST_BITSCAN
-static inline size_t mi_clz(uintptr_t x) {
-  if (x==0) return MI_INTPTR_BITS;
-#if (INTPTR_MAX == LONG_MAX)
-  return __builtin_clzl(x);
-#else
-  return __builtin_clzll(x);
-#endif
-}
-static inline size_t mi_ctz(uintptr_t x) {
-  if (x==0) return MI_INTPTR_BITS;
-#if (INTPTR_MAX == LONG_MAX)
-  return __builtin_ctzl(x);
-#else
-  return __builtin_ctzll(x);
-#endif
-}
-
-#elif defined(_MSC_VER)
-
-#include <limits.h>       // LONG_MAX
-#define MI_HAVE_FAST_BITSCAN
-static inline size_t mi_clz(uintptr_t x) {
-  if (x==0) return MI_INTPTR_BITS;
-  unsigned long idx;
-#if (INTPTR_MAX == LONG_MAX)
-  _BitScanReverse(&idx, x);
-#else
-  _BitScanReverse64(&idx, x);
-#endif
-  return ((MI_INTPTR_BITS - 1) - idx);
-}
-static inline size_t mi_ctz(uintptr_t x) {
-  if (x==0) return MI_INTPTR_BITS;
-  unsigned long idx;
-#if (INTPTR_MAX == LONG_MAX)
-  _BitScanForward(&idx, x);
-#else
-  _BitScanForward64(&idx, x);
-#endif
-  return idx;
-}
-
-#else
-static inline size_t mi_ctz32(uint32_t x) {
-  // de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
-  static const unsigned char debruijn[32] = {
-    0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
-    31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
-  };
-  if (x==0) return 32;
-  return debruijn[((x & -(int32_t)x) * 0x077CB531UL) >> 27];
-}
-static inline size_t mi_clz32(uint32_t x) {
-  // de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
-  static const uint8_t debruijn[32] = {
-    31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1,
-    23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0
-  };
-  if (x==0) return 32;
-  x |= x >> 1;
-  x |= x >> 2;
-  x |= x >> 4;
-  x |= x >> 8;
-  x |= x >> 16;
-  return debruijn[(uint32_t)(x * 0x07C4ACDDUL) >> 27];
-}
-
-static inline size_t mi_clz(uintptr_t x) {
-  if (x==0) return MI_INTPTR_BITS;
-#if (MI_INTPTR_BITS <= 32)
-  return mi_clz32((uint32_t)x);
-#else
-  size_t count = mi_clz32((uint32_t)(x >> 32));
-  if (count < 32) return count;
-  return (32 + mi_clz32((uint32_t)x));
-#endif
-}
-static inline size_t mi_ctz(uintptr_t x) {
-  if (x==0) return MI_INTPTR_BITS;
-#if (MI_INTPTR_BITS <= 32)
-  return mi_ctz32((uint32_t)x);
-#else
-  size_t count = mi_ctz32((uint32_t)x);
-  if (count < 32) return count;
-  return (32 + mi_ctz32((uint32_t)(x>>32)));
-#endif
-}
-
-#endif
-
-// "bit scan reverse": Return index of the highest bit (or MI_INTPTR_BITS if `x` is zero)
-static inline size_t mi_bsr(uintptr_t x) {
-  return (x==0 ? MI_INTPTR_BITS : MI_INTPTR_BITS - 1 - mi_clz(x));
-}
-
-
-// ---------------------------------------------------------------------------------
-// Provide our own `_mi_memcpy` for potential performance optimizations.
-//
-// For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if
-// we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support
-// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253.
-// ---------------------------------------------------------------------------------
-
-#if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
-#include <intrin.h>
-#include <string.h>
-extern bool _mi_cpu_has_fsrm;
-static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
-  if (_mi_cpu_has_fsrm) {
-    __movsb((unsigned char*)dst, (const unsigned char*)src, n);
-  }
-  else {
-    memcpy(dst, src, n);
-  }
-}
-static inline void _mi_memzero(void* dst, size_t n) {
-  if (_mi_cpu_has_fsrm) {
-    __stosb((unsigned char*)dst, 0, n);
-  }
-  else {
-    memset(dst, 0, n);
-  }
-}
-#else
-#include <string.h>
-static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
-  memcpy(dst, src, n);
-}
-static inline void _mi_memzero(void* dst, size_t n) {
-  memset(dst, 0, n);
-}
-#endif
-
-
-// -------------------------------------------------------------------------------
-// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned
-// This is used for example in `mi_realloc`.
-// -------------------------------------------------------------------------------
-
-#if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__)
-// On GCC/CLang we provide a hint that the pointers are word aligned.
-#include <string.h>
-static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
-  mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0));
-  void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE);
-  const void* asrc = __builtin_assume_aligned(src, MI_INTPTR_SIZE);
-  _mi_memcpy(adst, asrc, n);
-}
-
-static inline void _mi_memzero_aligned(void* dst, size_t n) {
-  mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0);
-  void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE);
-  _mi_memzero(adst, n);
-}
-#else
-// Default fallback on `_mi_memcpy`
-static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
-  mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0));
-  _mi_memcpy(dst, src, n);
-}
-
-static inline void _mi_memzero_aligned(void* dst, size_t n) {
-  mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0);
-  _mi_memzero(dst, n);
-}
-#endif
-
-
-#endif
diff --git a/3rd/mimalloc-2.0.9/include/mimalloc-track.h b/3rd/mimalloc-2.0.9/include/mimalloc-track.h
deleted file mode 100644
index f60d7acd..00000000
--- a/3rd/mimalloc-2.0.9/include/mimalloc-track.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/* ----------------------------------------------------------------------------
-Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
------------------------------------------------------------------------------*/
-#pragma once
-#ifndef MIMALLOC_TRACK_H
-#define MIMALLOC_TRACK_H
-
-// ------------------------------------------------------
-// Track memory ranges with macros for tools like Valgrind
-// address sanitizer, or other memory checkers.
-// ------------------------------------------------------
-
-#if MI_VALGRIND
-
-#define MI_TRACK_ENABLED 1
-#define MI_TRACK_TOOL    "valgrind"
-
-#include <valgrind/valgrind.h>
-#include <valgrind/memcheck.h>
-
-#define mi_track_malloc(p,size,zero)        VALGRIND_MALLOCLIKE_BLOCK(p,size,MI_PADDING_SIZE /*red zone*/,zero)
-#define mi_track_resize(p,oldsize,newsize)  VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /*red zone*/)
-#define mi_track_free(p)                    VALGRIND_FREELIKE_BLOCK(p,MI_PADDING_SIZE /*red zone*/)
-#define mi_track_free_size(p,_size)         mi_track_free(p)
-#define mi_track_mem_defined(p,size)        VALGRIND_MAKE_MEM_DEFINED(p,size)
-#define mi_track_mem_undefined(p,size)      VALGRIND_MAKE_MEM_UNDEFINED(p,size)
-#define mi_track_mem_noaccess(p,size)       VALGRIND_MAKE_MEM_NOACCESS(p,size)
-
-#elif MI_ASAN
-
-#define MI_TRACK_ENABLED 1
-#define MI_TRACK_TOOL    "asan"
-
-#include <sanitizer/asan_interface.h>
-
-#define mi_track_malloc(p,size,zero)        ASAN_UNPOISON_MEMORY_REGION(p,size)
-#define mi_track_resize(p,oldsize,newsize)  ASAN_POISON_MEMORY_REGION(p,oldsize); ASAN_UNPOISON_MEMORY_REGION(p,newsize)
-#define mi_track_free(p)                    ASAN_POISON_MEMORY_REGION(p,mi_usable_size(p))
-#define mi_track_free_size(p,size)          ASAN_POISON_MEMORY_REGION(p,size)
-#define mi_track_mem_defined(p,size)        ASAN_UNPOISON_MEMORY_REGION(p,size)
-#define mi_track_mem_undefined(p,size)      ASAN_UNPOISON_MEMORY_REGION(p,size)
-#define mi_track_mem_noaccess(p,size)       ASAN_POISON_MEMORY_REGION(p,size)
-
-#else
-
-#define MI_TRACK_ENABLED 0
-#define MI_TRACK_TOOL    "none"
-
-#define mi_track_malloc(p,size,zero)
-#define mi_track_resize(p,oldsize,newsize)
-#define mi_track_free(p)
-#define mi_track_free_size(p,_size)
-#define mi_track_mem_defined(p,size)
-#define mi_track_mem_undefined(p,size)
-#define mi_track_mem_noaccess(p,size)
-
-#endif
-
-#endif
diff --git a/3rd/mimalloc-2.0.9/include/mimalloc-types.h b/3rd/mimalloc-2.0.9/include/mimalloc-types.h
deleted file mode 100644
index f3af528e..00000000
--- a/3rd/mimalloc-2.0.9/include/mimalloc-types.h
+++ /dev/null
@@ -1,609 +0,0 @@
-/* ----------------------------------------------------------------------------
-Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
------------------------------------------------------------------------------*/
-#pragma once
-#ifndef MIMALLOC_TYPES_H
-#define MIMALLOC_TYPES_H
-
-#include <stddef.h>   // ptrdiff_t
-#include <stdint.h>   // uintptr_t, uint16_t, etc
-#include "mimalloc-atomic.h"  // _Atomic
-
-#ifdef _MSC_VER
-#pragma warning(disable:4214) // bitfield is not int
-#endif
-
-// Minimal alignment necessary. On most platforms 16 bytes are needed
-// due to SSE registers for example. This must be at least `sizeof(void*)`
-#ifndef MI_MAX_ALIGN_SIZE
-#define MI_MAX_ALIGN_SIZE  16   // sizeof(max_align_t)
-#endif
-
-// ------------------------------------------------------
-// Variants
-// ------------------------------------------------------
-
-// Define NDEBUG in the release version to disable assertions.
-// #define NDEBUG
-
-// Define MI_VALGRIND to enable valgrind support
-// #define MI_VALGRIND 1
-
-// Define MI_STAT as 1 to maintain statistics; set it to 2 to have detailed statistics (but costs some performance).
-// #define MI_STAT 1
-
-// Define MI_SECURE to enable security mitigations
-// #define MI_SECURE 1  // guard page around metadata
-// #define MI_SECURE 2  // guard page around each mimalloc page
-// #define MI_SECURE 3  // encode free lists (detect corrupted free list (buffer overflow), and invalid pointer free)
-// #define MI_SECURE 4  // checks for double free. (may be more expensive)
-
-#if !defined(MI_SECURE)
-#define MI_SECURE 0
-#endif
-
-// Define MI_DEBUG for debug mode
-// #define MI_DEBUG 1  // basic assertion checks and statistics, check double free, corrupted free list, and invalid pointer free.
-// #define MI_DEBUG 2  // + internal assertion checks
-// #define MI_DEBUG 3  // + extensive internal invariant checking (cmake -DMI_DEBUG_FULL=ON)
-#if !defined(MI_DEBUG)
-#if !defined(NDEBUG) || defined(_DEBUG)
-#define MI_DEBUG 2
-#else
-#define MI_DEBUG 0
-#endif
-#endif
-
-// Reserve extra padding at the end of each block to be more resilient against heap block overflows.
-// The padding can detect byte-precise buffer overflow on free.
-#if !defined(MI_PADDING) && (MI_DEBUG>=1 || MI_VALGRIND)
-#define MI_PADDING  1
-#endif
-
-
-// Encoded free lists allow detection of corrupted free lists
-// and can detect buffer overflows, modify after free, and double `free`s.
-#if (MI_SECURE>=3 || MI_DEBUG>=1)
-#define MI_ENCODE_FREELIST  1
-#endif
-
-
-// We used to abandon huge pages but to eagerly deallocate if freed from another thread,
-// but that makes it not possible to visit them during a heap walk or include them in a
-// `mi_heap_destroy`. We therefore instead reset/decommit the huge blocks if freed from
-// another thread so most memory is available until it gets properly freed by the owning thread.
-// #define MI_HUGE_PAGE_ABANDON 1
-
-
-// ------------------------------------------------------
-// Platform specific values
-// ------------------------------------------------------
-
-// ------------------------------------------------------
-// Size of a pointer.
-// We assume that `sizeof(void*)==sizeof(intptr_t)`
-// and it holds for all platforms we know of.
-//
-// However, the C standard only requires that:
-//  p == (void*)((intptr_t)p))
-// but we also need:
-//  i == (intptr_t)((void*)i)
-// or otherwise one might define an intptr_t type that is larger than a pointer...
-// ------------------------------------------------------
-
-#if INTPTR_MAX > INT64_MAX
-# define MI_INTPTR_SHIFT (4)  // assume 128-bit  (as on arm CHERI for example)
-#elif INTPTR_MAX == INT64_MAX
-# define MI_INTPTR_SHIFT (3)
-#elif INTPTR_MAX == INT32_MAX
-# define MI_INTPTR_SHIFT (2)
-#else
-#error platform pointers must be 32, 64, or 128 bits
-#endif
-
-#if SIZE_MAX == UINT64_MAX
-# define MI_SIZE_SHIFT (3)
-typedef int64_t  mi_ssize_t;
-#elif SIZE_MAX == UINT32_MAX
-# define MI_SIZE_SHIFT (2)
-typedef int32_t  mi_ssize_t;
-#else
-#error platform objects must be 32 or 64 bits
-#endif
-
-#if (SIZE_MAX/2) > LONG_MAX
-# define MI_ZU(x)  x##ULL
-# define MI_ZI(x)  x##LL
-#else
-# define MI_ZU(x)  x##UL
-# define MI_ZI(x)  x##L
-#endif
-
-#define MI_INTPTR_SIZE  (1<<MI_INTPTR_SHIFT)
-#define MI_INTPTR_BITS  (MI_INTPTR_SIZE*8)
-
-#define MI_SIZE_SIZE  (1<<MI_SIZE_SHIFT)
-#define MI_SIZE_BITS  (MI_SIZE_SIZE*8)
-
-#define MI_KiB     (MI_ZU(1024))
-#define MI_MiB     (MI_KiB*MI_KiB)
-#define MI_GiB     (MI_MiB*MI_KiB)
-
-
-// ------------------------------------------------------
-// Main internal data-structures
-// ------------------------------------------------------
-
-// Main tuning parameters for segment and page sizes
-// Sizes for 64-bit (usually divide by two for 32-bit)
-#define MI_SEGMENT_SLICE_SHIFT            (13 + MI_INTPTR_SHIFT)         // 64KiB  (32KiB on 32-bit)
-
-#if MI_INTPTR_SIZE > 4
-#define MI_SEGMENT_SHIFT                  ( 9 + MI_SEGMENT_SLICE_SHIFT)  // 32MiB
-#else
-#define MI_SEGMENT_SHIFT                  ( 7 + MI_SEGMENT_SLICE_SHIFT)  // 4MiB on 32-bit
-#endif
-
-#define MI_SMALL_PAGE_SHIFT               (MI_SEGMENT_SLICE_SHIFT)       // 64KiB
-#define MI_MEDIUM_PAGE_SHIFT              ( 3 + MI_SMALL_PAGE_SHIFT)     // 512KiB
-
-
-// Derived constants
-#define MI_SEGMENT_SIZE                   (MI_ZU(1)<<MI_SEGMENT_SHIFT)
-#define MI_SEGMENT_ALIGN                  MI_SEGMENT_SIZE
-#define MI_SEGMENT_MASK                   (MI_SEGMENT_ALIGN - 1)
-#define MI_SEGMENT_SLICE_SIZE             (MI_ZU(1)<< MI_SEGMENT_SLICE_SHIFT)
-#define MI_SLICES_PER_SEGMENT             (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 1024
-
-#define MI_SMALL_PAGE_SIZE                (MI_ZU(1)<<MI_SMALL_PAGE_SHIFT)
-#define MI_MEDIUM_PAGE_SIZE               (MI_ZU(1)<<MI_MEDIUM_PAGE_SHIFT)
-
-#define MI_SMALL_OBJ_SIZE_MAX             (MI_SMALL_PAGE_SIZE/4)   // 8KiB on 64-bit
-#define MI_MEDIUM_OBJ_SIZE_MAX            (MI_MEDIUM_PAGE_SIZE/4)  // 128KiB on 64-bit
-#define MI_MEDIUM_OBJ_WSIZE_MAX           (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE)   
-#define MI_LARGE_OBJ_SIZE_MAX             (MI_SEGMENT_SIZE/2)      // 32MiB on 64-bit
-#define MI_LARGE_OBJ_WSIZE_MAX            (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
-
-// Maximum number of size classes. (spaced exponentially in 12.5% increments)
-#define MI_BIN_HUGE  (73U)
-
-#if (MI_MEDIUM_OBJ_WSIZE_MAX >= 655360)
-#error "mimalloc internal: define more bins"
-#endif
-
-// Maximum slice offset (15)
-#define MI_MAX_SLICE_OFFSET               ((MI_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1)
-
-// Used as a special value to encode block sizes in 32 bits.
-#define MI_HUGE_BLOCK_SIZE                ((uint32_t)(2*MI_GiB))
-
-// blocks up to this size are always allocated aligned
-#define MI_MAX_ALIGN_GUARANTEE            (8*MI_MAX_ALIGN_SIZE)  
-
-// Alignments over MI_ALIGNMENT_MAX are allocated in dedicated huge page segments 
-#define MI_ALIGNMENT_MAX                  (MI_SEGMENT_SIZE >> 1)  
-
-
-// ------------------------------------------------------
-// Mimalloc pages contain allocated blocks
-// ------------------------------------------------------
-
-// The free lists use encoded next fields
-// (Only actually encodes when MI_ENCODED_FREELIST is defined.)
-typedef uintptr_t  mi_encoded_t;
-
-// thread id's
-typedef size_t     mi_threadid_t;
-
-// free lists contain blocks
-typedef struct mi_block_s {
-  mi_encoded_t next;
-} mi_block_t;
-
-
-// The delayed flags are used for efficient multi-threaded free-ing
-typedef enum mi_delayed_e {
-  MI_USE_DELAYED_FREE   = 0, // push on the owning heap thread delayed list
-  MI_DELAYED_FREEING    = 1, // temporary: another thread is accessing the owning heap
-  MI_NO_DELAYED_FREE    = 2, // optimize: push on page local thread free queue if another block is already in the heap thread delayed free list
-  MI_NEVER_DELAYED_FREE = 3  // sticky, only resets on page reclaim
-} mi_delayed_t;
-
-
-// The `in_full` and `has_aligned` page flags are put in a union to efficiently
-// test if both are false (`full_aligned == 0`) in the `mi_free` routine.
-#if !MI_TSAN
-typedef union mi_page_flags_s {
-  uint8_t full_aligned;
-  struct {
-    uint8_t in_full : 1;
-    uint8_t has_aligned : 1;
-  } x;
-} mi_page_flags_t;
-#else
-// under thread sanitizer, use a byte for each flag to suppress warning, issue #130
-typedef union mi_page_flags_s {
-  uint16_t full_aligned;
-  struct {
-    uint8_t in_full;
-    uint8_t has_aligned;
-  } x;
-} mi_page_flags_t;
-#endif
-
-// Thread free list.
-// We use the bottom 2 bits of the pointer for mi_delayed_t flags
-typedef uintptr_t mi_thread_free_t;
-
-// A page contains blocks of one specific size (`block_size`).
-// Each page has three list of free blocks:
-// `free` for blocks that can be allocated,
-// `local_free` for freed blocks that are not yet available to `mi_malloc`
-// `thread_free` for freed blocks by other threads
-// The `local_free` and `thread_free` lists are migrated to the `free` list
-// when it is exhausted. The separate `local_free` list is necessary to
-// implement a monotonic heartbeat. The `thread_free` list is needed for
-// avoiding atomic operations in the common case.
-//
-//
-// `used - |thread_free|` == actual blocks that are in use (alive)
-// `used - |thread_free| + |free| + |local_free| == capacity`
-//
-// We don't count `freed` (as |free|) but use `used` to reduce
-// the number of memory accesses in the `mi_page_all_free` function(s).
-//
-// Notes:
-// - Access is optimized for `mi_free` and `mi_page_alloc` (in `alloc.c`)
-// - Using `uint16_t` does not seem to slow things down
-// - The size is 8 words on 64-bit which helps the page index calculations
-//   (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10
-//    and 12 are still good for address calculation)
-// - To limit the structure size, the `xblock_size` is 32-bits only; for
-//   blocks > MI_HUGE_BLOCK_SIZE the size is determined from the segment page size
-// - `thread_free` uses the bottom bits as a delayed-free flags to optimize
-//   concurrent frees where only the first concurrent free adds to the owning
-//   heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`).
-//   The invariant is that no-delayed-free is only set if there is
-//   at least one block that will be added, or as already been added, to
-//   the owning heap `thread_delayed_free` list. This guarantees that pages
-//   will be freed correctly even if only other threads free blocks.
-typedef struct mi_page_s {
-  // "owned" by the segment
-  uint32_t              slice_count;       // slices in this page (0 if not a page)
-  uint32_t              slice_offset;      // distance from the actual page data slice (0 if a page)
-  uint8_t               is_reset : 1;      // `true` if the page memory was reset
-  uint8_t               is_committed : 1;  // `true` if the page virtual memory is committed
-  uint8_t               is_zero_init : 1;  // `true` if the page was zero initialized
-
-  // layout like this to optimize access in `mi_malloc` and `mi_free`
-  uint16_t              capacity;          // number of blocks committed, must be the first field, see `segment.c:page_clear`
-  uint16_t              reserved;          // number of blocks reserved in memory
-  mi_page_flags_t       flags;             // `in_full` and `has_aligned` flags (8 bits)
-  uint8_t               is_zero : 1;       // `true` if the blocks in the free list are zero initialized
-  uint8_t               retire_expire : 7; // expiration count for retired blocks
-
-  mi_block_t*           free;              // list of available free blocks (`malloc` allocates from this list)
-  uint32_t              used;              // number of blocks in use (including blocks in `local_free` and `thread_free`)
-  uint32_t              xblock_size;       // size available in each block (always `>0`)
-  mi_block_t*           local_free;        // list of deferred free blocks by this thread (migrates to `free`)
-
-  #ifdef MI_ENCODE_FREELIST
-  uintptr_t             keys[2];           // two random keys to encode the free lists (see `_mi_block_next`)
-  #endif
-
-  _Atomic(mi_thread_free_t) xthread_free;  // list of deferred free blocks freed by other threads
-  _Atomic(uintptr_t)        xheap;
-
-  struct mi_page_s*     next;              // next page owned by this thread with the same `block_size`
-  struct mi_page_s*     prev;              // previous page owned by this thread with the same `block_size`
-
-  // 64-bit 9 words, 32-bit 12 words, (+2 for secure)
-  #if MI_INTPTR_SIZE==8
-  uintptr_t padding[1];
-  #endif
-} mi_page_t;
-
-
-
-typedef enum mi_page_kind_e {
-  MI_PAGE_SMALL,    // small blocks go into 64KiB pages inside a segment
-  MI_PAGE_MEDIUM,   // medium blocks go into medium pages inside a segment
-  MI_PAGE_LARGE,    // larger blocks go into a page of just one block
-  MI_PAGE_HUGE,     // huge blocks (> 16 MiB) are put into a single page in a single segment.
-} mi_page_kind_t;
-
-typedef enum mi_segment_kind_e {
-  MI_SEGMENT_NORMAL, // MI_SEGMENT_SIZE size with pages inside.
-  MI_SEGMENT_HUGE,   // > MI_LARGE_SIZE_MAX segment with just one huge page inside.
-} mi_segment_kind_t;
-
-// ------------------------------------------------------
-// A segment holds a commit mask where a bit is set if
-// the corresponding MI_COMMIT_SIZE area is committed.
-// The MI_COMMIT_SIZE must be a multiple of the slice
-// size. If it is equal we have the most fine grained 
-// decommit (but setting it higher can be more efficient).
-// The MI_MINIMAL_COMMIT_SIZE is the minimal amount that will
-// be committed in one go which can be set higher than
-// MI_COMMIT_SIZE for efficiency (while the decommit mask
-// is still tracked in fine-grained MI_COMMIT_SIZE chunks)
-// ------------------------------------------------------
-
-#define MI_MINIMAL_COMMIT_SIZE      (16*MI_SEGMENT_SLICE_SIZE)           // 1MiB
-#define MI_COMMIT_SIZE              (MI_SEGMENT_SLICE_SIZE)              // 64KiB
-#define MI_COMMIT_MASK_BITS         (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)  
-#define MI_COMMIT_MASK_FIELD_BITS    MI_SIZE_BITS
-#define MI_COMMIT_MASK_FIELD_COUNT  (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS)
-
-#if (MI_COMMIT_MASK_BITS != (MI_COMMIT_MASK_FIELD_COUNT * MI_COMMIT_MASK_FIELD_BITS))
-#error "the segment size must be exactly divisible by the (commit size * size_t bits)"
-#endif
-
-typedef struct mi_commit_mask_s {
-  size_t mask[MI_COMMIT_MASK_FIELD_COUNT];
-} mi_commit_mask_t;
-
-typedef mi_page_t  mi_slice_t;
-typedef int64_t    mi_msecs_t;
-
-
-// Segments are large allocated memory blocks (8mb on 64 bit) from
-// the OS. Inside segments we allocated fixed size _pages_ that
-// contain blocks.
-typedef struct mi_segment_s {
-  size_t            memid;              // memory id for arena allocation
-  bool              mem_is_pinned;      // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages)    
-  bool              mem_is_large;       // in large/huge os pages?
-  bool              mem_is_committed;   // `true` if the whole segment is eagerly committed
-  size_t            mem_alignment;      // page alignment for huge pages (only used for alignment > MI_ALIGNMENT_MAX)
-  size_t            mem_align_offset;   // offset for huge page alignment (only used for alignment > MI_ALIGNMENT_MAX)
-
-  bool              allow_decommit;     
-  mi_msecs_t        decommit_expire;
-  mi_commit_mask_t  decommit_mask;
-  mi_commit_mask_t  commit_mask;
-
-  _Atomic(struct mi_segment_s*) abandoned_next;
-
-  // from here is zero initialized
-  struct mi_segment_s* next;            // the list of freed segments in the cache (must be first field, see `segment.c:mi_segment_init`)
-  
-  size_t            abandoned;          // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
-  size_t            abandoned_visits;   // count how often this segment is visited in the abandoned list (to force reclaim it it is too long)
-  size_t            used;               // count of pages in use
-  uintptr_t         cookie;             // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`  
-
-  size_t            segment_slices;      // for huge segments this may be different from `MI_SLICES_PER_SEGMENT`
-  size_t            segment_info_slices; // initial slices we are using segment info and possible guard pages.
-
-  // layout like this to optimize access in `mi_free`
-  mi_segment_kind_t kind;
-  size_t            slice_entries;       // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT`
-  _Atomic(mi_threadid_t) thread_id;      // unique id of the thread owning this segment
-
-  mi_slice_t        slices[MI_SLICES_PER_SEGMENT+1];  // one more for huge blocks with large alignment
-} mi_segment_t;
-
-
-// ------------------------------------------------------
-// Heaps
-// Provide first-class heaps to allocate from.
-// A heap just owns a set of pages for allocation and
-// can only be allocate/reallocate from the thread that created it.
-// Freeing blocks can be done from any thread though.
-// Per thread, the segments are shared among its heaps.
-// Per thread, there is always a default heap that is
-// used for allocation; it is initialized to statically
-// point to an empty heap to avoid initialization checks
-// in the fast path.
-// ------------------------------------------------------
-
-// Thread local data
-typedef struct mi_tld_s mi_tld_t;
-
-// Pages of a certain block size are held in a queue.
-typedef struct mi_page_queue_s {
-  mi_page_t* first;
-  mi_page_t* last;
-  size_t     block_size;
-} mi_page_queue_t;
-
-#define MI_BIN_FULL  (MI_BIN_HUGE+1)
-
-// Random context
-typedef struct mi_random_cxt_s {
-  uint32_t input[16];
-  uint32_t output[16];
-  int      output_available;
-  bool     weak;
-} mi_random_ctx_t;
-
-
-// In debug mode there is a padding structure at the end of the blocks to check for buffer overflows
-#if (MI_PADDING)
-typedef struct mi_padding_s {
-  uint32_t canary; // encoded block value to check validity of the padding (in case of overflow)
-  uint32_t delta;  // padding bytes before the block. (mi_usable_size(p) - delta == exact allocated bytes)
-} mi_padding_t;
-#define MI_PADDING_SIZE   (sizeof(mi_padding_t))
-#define MI_PADDING_WSIZE  ((MI_PADDING_SIZE + MI_INTPTR_SIZE - 1) / MI_INTPTR_SIZE)
-#else
-#define MI_PADDING_SIZE   0
-#define MI_PADDING_WSIZE  0
-#endif
-
-#define MI_PAGES_DIRECT   (MI_SMALL_WSIZE_MAX + MI_PADDING_WSIZE + 1)
-
-
-// A heap owns a set of pages.
-struct mi_heap_s {
-  mi_tld_t*             tld;
-  mi_page_t*            pages_free_direct[MI_PAGES_DIRECT];  // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size.
-  mi_page_queue_t       pages[MI_BIN_FULL + 1];              // queue of pages for each size class (or "bin")
-  _Atomic(mi_block_t*)  thread_delayed_free;
-  mi_threadid_t         thread_id;                           // thread this heap belongs too
-  mi_arena_id_t         arena_id;                            // arena id if the heap belongs to a specific arena (or 0)  
-  uintptr_t             cookie;                              // random cookie to verify pointers (see `_mi_ptr_cookie`)
-  uintptr_t             keys[2];                             // two random keys used to encode the `thread_delayed_free` list
-  mi_random_ctx_t       random;                              // random number context used for secure allocation
-  size_t                page_count;                          // total number of pages in the `pages` queues.
-  size_t                page_retired_min;                    // smallest retired index (retired pages are fully free, but still in the page queues)
-  size_t                page_retired_max;                    // largest retired index into the `pages` array.
-  mi_heap_t*            next;                                // list of heaps per thread
-  bool                  no_reclaim;                          // `true` if this heap should not reclaim abandoned pages
-};
-
-
-
-// ------------------------------------------------------
-// Debug
-// ------------------------------------------------------
-
-#if !defined(MI_DEBUG_UNINIT)
-#define MI_DEBUG_UNINIT     (0xD0)
-#endif
-#if !defined(MI_DEBUG_FREED)
-#define MI_DEBUG_FREED      (0xDF)
-#endif
-#if !defined(MI_DEBUG_PADDING)
-#define MI_DEBUG_PADDING    (0xDE)
-#endif
-
-#if (MI_DEBUG)
-// use our own assertion to print without memory allocation
-void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line, const char* func );
-#define mi_assert(expr)     ((expr) ? (void)0 : _mi_assert_fail(#expr,__FILE__,__LINE__,__func__))
-#else
-#define mi_assert(x)
-#endif
-
-#if (MI_DEBUG>1)
-#define mi_assert_internal    mi_assert
-#else
-#define mi_assert_internal(x)
-#endif
-
-#if (MI_DEBUG>2)
-#define mi_assert_expensive   mi_assert
-#else
-#define mi_assert_expensive(x)
-#endif
-
-// ------------------------------------------------------
-// Statistics
-// ------------------------------------------------------
-
-#ifndef MI_STAT
-#if (MI_DEBUG>0)
-#define MI_STAT 2
-#else
-#define MI_STAT 0
-#endif
-#endif
-
-typedef struct mi_stat_count_s {
-  int64_t allocated;
-  int64_t freed;
-  int64_t peak;
-  int64_t current;
-} mi_stat_count_t;
-
-typedef struct mi_stat_counter_s {
-  int64_t total;
-  int64_t count;
-} mi_stat_counter_t;
-
-typedef struct mi_stats_s {
-  mi_stat_count_t segments;
-  mi_stat_count_t pages;
-  mi_stat_count_t reserved;
-  mi_stat_count_t committed;
-  mi_stat_count_t reset;
-  mi_stat_count_t page_committed;
-  mi_stat_count_t segments_abandoned;
-  mi_stat_count_t pages_abandoned;
-  mi_stat_count_t threads;
-  mi_stat_count_t normal;
-  mi_stat_count_t huge;
-  mi_stat_count_t large;
-  mi_stat_count_t malloc;
-  mi_stat_count_t segments_cache;
-  mi_stat_counter_t pages_extended;
-  mi_stat_counter_t mmap_calls;
-  mi_stat_counter_t commit_calls;
-  mi_stat_counter_t page_no_retire;
-  mi_stat_counter_t searches;
-  mi_stat_counter_t normal_count;
-  mi_stat_counter_t huge_count;
-  mi_stat_counter_t large_count;
-#if MI_STAT>1
-  mi_stat_count_t normal_bins[MI_BIN_HUGE+1];
-#endif
-} mi_stats_t;
-
-
-void _mi_stat_increase(mi_stat_count_t* stat, size_t amount);
-void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount);
-void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
-
-#if (MI_STAT)
-#define mi_stat_increase(stat,amount)         _mi_stat_increase( &(stat), amount)
-#define mi_stat_decrease(stat,amount)         _mi_stat_decrease( &(stat), amount)
-#define mi_stat_counter_increase(stat,amount) _mi_stat_counter_increase( &(stat), amount)
-#else
-#define mi_stat_increase(stat,amount)         (void)0
-#define mi_stat_decrease(stat,amount)         (void)0
-#define mi_stat_counter_increase(stat,amount) (void)0
-#endif
-
-#define mi_heap_stat_counter_increase(heap,stat,amount)  mi_stat_counter_increase( (heap)->tld->stats.stat, amount)
-#define mi_heap_stat_increase(heap,stat,amount)  mi_stat_increase( (heap)->tld->stats.stat, amount)
-#define mi_heap_stat_decrease(heap,stat,amount)  mi_stat_decrease( (heap)->tld->stats.stat, amount)
-
-// ------------------------------------------------------
-// Thread Local data
-// ------------------------------------------------------
-
-// A "span" is is an available range of slices. The span queues keep
-// track of slice spans of at most the given `slice_count` (but more than the previous size class).
-typedef struct mi_span_queue_s {
-  mi_slice_t* first;
-  mi_slice_t* last;
-  size_t      slice_count;
-} mi_span_queue_t;
-
-#define MI_SEGMENT_BIN_MAX (35)     // 35 == mi_segment_bin(MI_SLICES_PER_SEGMENT)
-
-// OS thread local data
-typedef struct mi_os_tld_s {
-  size_t                region_idx;   // start point for next allocation
-  mi_stats_t*           stats;        // points to tld stats
-} mi_os_tld_t;
-
-
-// Segments thread local data
-typedef struct mi_segments_tld_s {
-  mi_span_queue_t     spans[MI_SEGMENT_BIN_MAX+1];  // free slice spans inside segments
-  size_t              count;        // current number of segments;
-  size_t              peak_count;   // peak number of segments
-  size_t              current_size; // current size of all segments
-  size_t              peak_size;    // peak size of all segments
-  mi_stats_t*         stats;        // points to tld stats
-  mi_os_tld_t*        os;           // points to os stats
-} mi_segments_tld_t;
-
-// Thread local data
-struct mi_tld_s {
-  unsigned long long  heartbeat;     // monotonic heartbeat count
-  bool                recurse;       // true if deferred was called; used to prevent infinite recursion.
-  mi_heap_t*          heap_backing;  // backing heap of this thread (cannot be deleted)
-  mi_heap_t*          heaps;         // list of heaps in this thread (so we can abandon all when the thread terminates)
-  mi_segments_tld_t   segments;      // segment tld
-  mi_os_tld_t         os;            // os tld
-  mi_stats_t          stats;         // statistics
-};
-
-#endif
diff --git a/3rd/mimalloc-2.0.9/src/alloc-aligned.c b/3rd/mimalloc-2.0.9/src/alloc-aligned.c
deleted file mode 100644
index 9fe82890..00000000
--- a/3rd/mimalloc-2.0.9/src/alloc-aligned.c
+++ /dev/null
@@ -1,306 +0,0 @@
-/* ----------------------------------------------------------------------------
-Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
------------------------------------------------------------------------------*/
-
-#include "mimalloc.h"
-#include "mimalloc-internal.h"
-
-#include <string.h>  // memset
-
-// ------------------------------------------------------
-// Aligned Allocation
-// ------------------------------------------------------
-
-// Fallback primitive aligned allocation -- split out for better codegen
-static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept
-{
-  mi_assert_internal(size <= PTRDIFF_MAX);
-  mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment));
-
-  const uintptr_t align_mask = alignment - 1;  // for any x, `(x & align_mask) == (x % alignment)`
-  const size_t padsize = size + MI_PADDING_SIZE;
-
-  // use regular allocation if it is guaranteed to fit the alignment constraints
-  if (offset==0 && alignment<=padsize && padsize<=MI_MAX_ALIGN_GUARANTEE && (padsize&align_mask)==0) {
-    void* p = _mi_heap_malloc_zero(heap, size, zero);
-    mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0);
-    return p;
-  }
-
-  void* p;
-  size_t oversize;
-  if mi_unlikely(alignment > MI_ALIGNMENT_MAX) {
-    // use OS allocation for very large alignment and allocate inside a huge page (dedicated segment with 1 page)
-    // This can support alignments >= MI_SEGMENT_SIZE by ensuring the object can be aligned at a point in the
-    // first (and single) page such that the segment info is `MI_SEGMENT_SIZE` bytes before it (so it can be found by aligning the pointer down)
-    if mi_unlikely(offset != 0) {
-      // todo: cannot support offset alignment for very large alignments yet
-      #if MI_DEBUG > 0
-      _mi_error_message(EOVERFLOW, "aligned allocation with a very large alignment cannot be used with an alignment offset (size %zu, alignment %zu, offset %zu)\n", size, alignment, offset);
-      #endif
-      return NULL;
-    }
-    oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size);
-    p = _mi_heap_malloc_zero_ex(heap, oversize, false, alignment); // the page block size should be large enough to align in the single huge page block
-    // zero afterwards as only the area from the aligned_p may be committed!
-    if (p == NULL) return NULL;
-  }
-  else {
-    // otherwise over-allocate
-    oversize = size + alignment - 1;
-    p = _mi_heap_malloc_zero(heap, oversize, zero);
-    if (p == NULL) return NULL;
-  }
-
-  // .. and align within the allocation
-  const uintptr_t poffset = ((uintptr_t)p + offset) & align_mask;
-  const uintptr_t adjust  = (poffset == 0 ? 0 : alignment - poffset);
-  mi_assert_internal(adjust < alignment);
-  void* aligned_p = (void*)((uintptr_t)p + adjust);
-  if (aligned_p != p) {
-    mi_page_set_has_aligned(_mi_ptr_page(p), true);
-  }
-
-  mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size);
-  mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p));
-  mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0);
-  mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size);
-
-  // now zero the block if needed
-  if (zero && alignment > MI_ALIGNMENT_MAX) {
-    const ptrdiff_t diff = (uint8_t*)aligned_p - (uint8_t*)p;
-    const ptrdiff_t zsize = mi_page_usable_block_size(_mi_ptr_page(p)) - diff - MI_PADDING_SIZE;
-    if (zsize > 0) { _mi_memzero(aligned_p, zsize); }
-  }
-
-  #if MI_TRACK_ENABLED
-  if (p != aligned_p) {
-    mi_track_free_size(p, oversize);
-    mi_track_malloc(aligned_p, size, zero);
-  }
-  else {
-    mi_track_resize(aligned_p, oversize, size);
-  }
-  #endif
-  return aligned_p;
-}
-
-// Primitive aligned allocation
-static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept
-{
-  // note: we don't require `size > offset`, we just guarantee that the address at offset is aligned regardless of the allocated size.
-  mi_assert(alignment > 0);
-  if mi_unlikely(alignment == 0 || !_mi_is_power_of_two(alignment)) { // require power-of-two (see <https://en.cppreference.com/w/c/memory/aligned_alloc>)
-    #if MI_DEBUG > 0
-    _mi_error_message(EOVERFLOW, "aligned allocation requires the alignment to be a power-of-two (size %zu, alignment %zu)\n", size, alignment);
-    #endif
-    return NULL;
-  }
-  /*
-  if mi_unlikely(alignment > MI_ALIGNMENT_MAX) {  // we cannot align at a boundary larger than this (or otherwise we cannot find segment headers)
-    #if MI_DEBUG > 0
-    _mi_error_message(EOVERFLOW, "aligned allocation has a maximum alignment of %zu (size %zu, alignment %zu)\n", MI_ALIGNMENT_MAX, size, alignment);
-    #endif
-    return NULL;
-  }
-  */
-  if mi_unlikely(size > PTRDIFF_MAX) {          // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
-    #if MI_DEBUG > 0
-    _mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment);
-    #endif
-    return NULL;
-  }
-  const uintptr_t align_mask = alignment-1;       // for any x, `(x & align_mask) == (x % alignment)`
-  const size_t padsize = size + MI_PADDING_SIZE;  // note: cannot overflow due to earlier size > PTRDIFF_MAX check
-
-  // try first if there happens to be a small block available with just the right alignment
-  if mi_likely(padsize <= MI_SMALL_SIZE_MAX && alignment <= padsize) {
-    mi_page_t* page = _mi_heap_get_free_small_page(heap, padsize);
-    const bool is_aligned = (((uintptr_t)page->free+offset) & align_mask)==0;
-    if mi_likely(page->free != NULL && is_aligned)
-    {
-      #if MI_STAT>1
-      mi_heap_stat_increase(heap, malloc, size);
-      #endif
-      void* p = _mi_page_malloc(heap, page, padsize, zero); // TODO: inline _mi_page_malloc
-      mi_assert_internal(p != NULL);
-      mi_assert_internal(((uintptr_t)p + offset) % alignment == 0);
-      mi_track_malloc(p,size,zero);
-      return p;
-    }
-  }
-  // fallback
-  return mi_heap_malloc_zero_aligned_at_fallback(heap, size, alignment, offset, zero);
-}
-
-
-// ------------------------------------------------------
-// Optimized mi_heap_malloc_aligned / mi_malloc_aligned
-// ------------------------------------------------------
-
-mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
-  return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, false);
-}
-
-mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept {
-  #if !MI_PADDING
-  // without padding, any small sized allocation is naturally aligned (see also `_mi_segment_page_start`)
-  if (!_mi_is_power_of_two(alignment)) return NULL;
-  if mi_likely(_mi_is_power_of_two(size) && size >= alignment && size <= MI_SMALL_SIZE_MAX)
-  #else
-  // with padding, we can only guarantee this for fixed alignments
-  if mi_likely((alignment == sizeof(void*) || (alignment == MI_MAX_ALIGN_SIZE && size > (MI_MAX_ALIGN_SIZE/2)))
-                && size <= MI_SMALL_SIZE_MAX)
-  #endif
-  {
-    // fast path for common alignment and size
-    return mi_heap_malloc_small(heap, size);
-  }
-  else {
-    return mi_heap_malloc_aligned_at(heap, size, alignment, 0);
-  }
-}
-
-// ------------------------------------------------------
-// Aligned Allocation
-// ------------------------------------------------------
-
-mi_decl_nodiscard mi_decl_restrict void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
-  return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, true);
-}
-
-mi_decl_nodiscard mi_decl_restrict void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept {
-  return mi_heap_zalloc_aligned_at(heap, size, alignment, 0);
-}
-
-mi_decl_nodiscard mi_decl_restrict void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
-  size_t total;
-  if (mi_count_size_overflow(count, size, &total)) return NULL;
-  return mi_heap_zalloc_aligned_at(heap, total, alignment, offset);
-}
-
-mi_decl_nodiscard mi_decl_restrict void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept {
-  return mi_heap_calloc_aligned_at(heap,count,size,alignment,0);
-}
-
-mi_decl_nodiscard mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
-  return mi_heap_malloc_aligned_at(mi_get_default_heap(), size, alignment, offset);
-}
-
-mi_decl_nodiscard mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept {
-  return mi_heap_malloc_aligned(mi_get_default_heap(), size, alignment);
-}
-
-mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
-  return mi_heap_zalloc_aligned_at(mi_get_default_heap(), size, alignment, offset);
-}
-
-mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept {
-  return mi_heap_zalloc_aligned(mi_get_default_heap(), size, alignment);
-}
-
-mi_decl_nodiscard mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
-  return mi_heap_calloc_aligned_at(mi_get_default_heap(), count, size, alignment, offset);
-}
-
-mi_decl_nodiscard mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept {
-  return mi_heap_calloc_aligned(mi_get_default_heap(), count, size, alignment);
-}
-
-
-// ------------------------------------------------------
-// Aligned re-allocation
-// ------------------------------------------------------
-
-static void* mi_heap_realloc_zero_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset, bool zero) mi_attr_noexcept {
-  mi_assert(alignment > 0);
-  if (alignment <= sizeof(uintptr_t)) return _mi_heap_realloc_zero(heap,p,newsize,zero);
-  if (p == NULL) return mi_heap_malloc_zero_aligned_at(heap,newsize,alignment,offset,zero);
-  size_t size = mi_usable_size(p);
-  if (newsize <= size && newsize >= (size - (size / 2))
-      && (((uintptr_t)p + offset) % alignment) == 0) {
-    return p;  // reallocation still fits, is aligned and not more than 50% waste
-  }
-  else {
-    void* newp = mi_heap_malloc_aligned_at(heap,newsize,alignment,offset);
-    if (newp != NULL) {
-      if (zero && newsize > size) {
-        const mi_page_t* page = _mi_ptr_page(newp);
-        if (page->is_zero) {
-          // already zero initialized
-          mi_assert_expensive(mi_mem_is_zero(newp,newsize));
-        }
-        else {
-          // also set last word in the previous allocation to zero to ensure any padding is zero-initialized
-          size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0);
-          memset((uint8_t*)newp + start, 0, newsize - start);
-        }
-      }
-      _mi_memcpy_aligned(newp, p, (newsize > size ? size : newsize));
-      mi_free(p); // only free if successful
-    }
-    return newp;
-  }
-}
-
-static void* mi_heap_realloc_zero_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, bool zero) mi_attr_noexcept {
-  mi_assert(alignment > 0);
-  if (alignment <= sizeof(uintptr_t)) return _mi_heap_realloc_zero(heap,p,newsize,zero);
-  size_t offset = ((uintptr_t)p % alignment); // use offset of previous allocation (p can be NULL)
-  return mi_heap_realloc_zero_aligned_at(heap,p,newsize,alignment,offset,zero);
-}
-
-mi_decl_nodiscard void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
-  return mi_heap_realloc_zero_aligned_at(heap,p,newsize,alignment,offset,false);
-}
-
-mi_decl_nodiscard void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
-  return mi_heap_realloc_zero_aligned(heap,p,newsize,alignment,false);
-}
-
-mi_decl_nodiscard void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
-  return mi_heap_realloc_zero_aligned_at(heap, p, newsize, alignment, offset, true);
-}
-
-mi_decl_nodiscard void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
-  return mi_heap_realloc_zero_aligned(heap, p, newsize, alignment, true);
-}
-
-mi_decl_nodiscard void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
-  size_t total;
-  if (mi_count_size_overflow(newcount, size, &total)) return NULL;
-  return mi_heap_rezalloc_aligned_at(heap, p, total, alignment, offset);
-}
-
-mi_decl_nodiscard void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept {
-  size_t total;
-  if (mi_count_size_overflow(newcount, size, &total)) return NULL;
-  return mi_heap_rezalloc_aligned(heap, p, total, alignment);
-}
-
-mi_decl_nodiscard void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
-  return mi_heap_realloc_aligned_at(mi_get_default_heap(), p, newsize, alignment, offset);
-}
-
-mi_decl_nodiscard void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
-  return mi_heap_realloc_aligned(mi_get_default_heap(), p, newsize, alignment);
-}
-
-mi_decl_nodiscard void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
-  return mi_heap_rezalloc_aligned_at(mi_get_default_heap(), p, newsize, alignment, offset);
-}
-
-mi_decl_nodiscard void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
-  return mi_heap_rezalloc_aligned(mi_get_default_heap(), p, newsize, alignment);
-}
-
-mi_decl_nodiscard void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
-  return mi_heap_recalloc_aligned_at(mi_get_default_heap(), p, newcount, size, alignment, offset);
-}
-
-mi_decl_nodiscard void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept {
-  return mi_heap_recalloc_aligned(mi_get_default_heap(), p, newcount, size, alignment);
-}
diff --git a/3rd/mimalloc-2.0.9/src/alloc.c b/3rd/mimalloc-2.0.9/src/alloc.c
deleted file mode 100644
index 86453f15..00000000
--- a/3rd/mimalloc-2.0.9/src/alloc.c
+++ /dev/null
@@ -1,1031 +0,0 @@
-/* ----------------------------------------------------------------------------
-Copyright (c) 2018-2022, Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
------------------------------------------------------------------------------*/
-#ifndef _DEFAULT_SOURCE
-#define _DEFAULT_SOURCE   // for realpath() on Linux
-#endif
-
-#include "mimalloc.h"
-#include "mimalloc-internal.h"
-#include "mimalloc-atomic.h"
-
-
-#include <string.h>  // memset, strlen
-#include <stdlib.h>  // malloc, exit
-
-#define MI_IN_ALLOC_C
-#include "alloc-override.c"
-#undef MI_IN_ALLOC_C
-
-// ------------------------------------------------------
-// Allocation
-// ------------------------------------------------------
-
-// Fast allocation in a page: just pop from the free list.
-// Fall back to generic allocation only if the list is empty.
-extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept {
-  mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size);
-  mi_block_t* const block = page->free;
-  if mi_unlikely(block == NULL) {
-    return _mi_malloc_generic(heap, size, zero, 0);
-  }
-  mi_assert_internal(block != NULL && _mi_ptr_page(block) == page);
-  // pop from the free list
-  page->used++;
-  page->free = mi_block_next(page, block);
-  mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page);
-
-  // allow use of the block internally
-  // note: when tracking we need to avoid ever touching the MI_PADDING since
-  // that is tracked by valgrind etc. as non-accessible (through the red-zone, see `mimalloc-track.h`)
-  mi_track_mem_undefined(block, mi_page_usable_block_size(page));
-
-  // zero the block? note: we need to zero the full block size (issue #63)
-  if mi_unlikely(zero) {
-    mi_assert_internal(page->xblock_size != 0); // do not call with zero'ing for huge blocks (see _mi_malloc_generic)
-    const size_t zsize = (page->is_zero ? sizeof(block->next) + MI_PADDING_SIZE : page->xblock_size);
-    _mi_memzero_aligned(block, zsize - MI_PADDING_SIZE);
-  }
-
-#if (MI_DEBUG>0) && !MI_TRACK_ENABLED
-  if (!page->is_zero && !zero && !mi_page_is_huge(page)) {
-    memset(block, MI_DEBUG_UNINIT, mi_page_usable_block_size(page));
-  }
-#elif (MI_SECURE!=0)
-  if (!zero) { block->next = 0; } // don't leak internal data
-#endif
-
-#if (MI_STAT>0)
-  const size_t bsize = mi_page_usable_block_size(page);
-  if (bsize <= MI_MEDIUM_OBJ_SIZE_MAX) {
-    mi_heap_stat_increase(heap, normal, bsize);
-    mi_heap_stat_counter_increase(heap, normal_count, 1);
-#if (MI_STAT>1)
-    const size_t bin = _mi_bin(bsize);
-    mi_heap_stat_increase(heap, normal_bins[bin], 1);
-#endif
-  }
-#endif
-
-#if (MI_PADDING > 0) && defined(MI_ENCODE_FREELIST) && !MI_TRACK_ENABLED
-  mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page));
-  ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE));
-  #if (MI_DEBUG>1)
-  mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta));
-  mi_track_mem_defined(padding,sizeof(mi_padding_t));  // note: re-enable since mi_page_usable_block_size may set noaccess
-  #endif
-  padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys));
-  padding->delta  = (uint32_t)(delta);
-  if (!mi_page_is_huge(page)) {
-    uint8_t* fill = (uint8_t*)padding - delta;
-    const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // set at most N initial padding bytes
-    for (size_t i = 0; i < maxpad; i++) { fill[i] = MI_DEBUG_PADDING; }
-  }
-#endif
-
-  return block;
-}
-
-static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
-  mi_assert(heap != NULL);
-  #if MI_DEBUG
-  const uintptr_t tid = _mi_thread_id();
-  mi_assert(heap->thread_id == 0 || heap->thread_id == tid); // heaps are thread local
-  #endif
-  mi_assert(size <= MI_SMALL_SIZE_MAX);
-#if (MI_PADDING)
-  if (size == 0) {
-    size = sizeof(void*);
-  }
-#endif
-  mi_page_t* page = _mi_heap_get_free_small_page(heap, size + MI_PADDING_SIZE);
-  void* p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE, zero);
-  mi_assert_internal(p == NULL || mi_usable_size(p) >= size);
-#if MI_STAT>1
-  if (p != NULL) {
-    if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); }
-    mi_heap_stat_increase(heap, malloc, mi_usable_size(p));
-  }
-#endif
-  mi_track_malloc(p,size,zero);
-  return p;
-}
-
-// allocate a small block
-mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept {
-  return mi_heap_malloc_small_zero(heap, size, false);
-}
-
-mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept {
-  return mi_heap_malloc_small(mi_get_default_heap(), size);
-}
-
-// The main allocation function
-extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept {
-  if mi_likely(size <= MI_SMALL_SIZE_MAX) {
-    mi_assert_internal(huge_alignment == 0);
-    return mi_heap_malloc_small_zero(heap, size, zero);
-  }
-  else {
-    mi_assert(heap!=NULL);
-    mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id());   // heaps are thread local
-    void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero, huge_alignment);  // note: size can overflow but it is detected in malloc_generic
-    mi_assert_internal(p == NULL || mi_usable_size(p) >= size);
-    #if MI_STAT>1
-    if (p != NULL) {
-      if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); }
-      mi_heap_stat_increase(heap, malloc, mi_usable_size(p));
-    }
-    #endif
-    mi_track_malloc(p,size,zero);
-    return p;
-  }
-}
-
-extern inline void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
-  return _mi_heap_malloc_zero_ex(heap, size, zero, 0);
-}
-
-mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
-  return _mi_heap_malloc_zero(heap, size, false);
-}
-
-mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept {
-  return mi_heap_malloc(mi_get_default_heap(), size);
-}
-
-// zero initialized small block
-mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept {
-  return mi_heap_malloc_small_zero(mi_get_default_heap(), size, true);
-}
-
-mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
-  return _mi_heap_malloc_zero(heap, size, true);
-}
-
-mi_decl_nodiscard mi_decl_restrict void* mi_zalloc(size_t size) mi_attr_noexcept {
-  return mi_heap_zalloc(mi_get_default_heap(),size);
-}
-
-
-// ------------------------------------------------------
-// Check for double free in secure and debug mode
-// This is somewhat expensive so only enabled for secure mode 4
-// ------------------------------------------------------
-
-#if (MI_ENCODE_FREELIST && (MI_SECURE>=4 || MI_DEBUG!=0))
-// linear check if the free list contains a specific element
-static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, const mi_block_t* elem) {
-  while (list != NULL) {
-    if (elem==list) return true;
-    list = mi_block_next(page, list);
-  }
-  return false;
-}
-
-static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) {
-  // The decoded value is in the same page (or NULL).
-  // Walk the free lists to verify positively if it is already freed
-  if (mi_list_contains(page, page->free, block) ||
-      mi_list_contains(page, page->local_free, block) ||
-      mi_list_contains(page, mi_page_thread_free(page), block))
-  {
-    _mi_error_message(EAGAIN, "double free detected of block %p with size %zu\n", block, mi_page_block_size(page));
-    return true;
-  }
-  return false;
-}
-
-#define mi_track_page(page,access)  { size_t psize; void* pstart = _mi_page_start(_mi_page_segment(page),page,&psize); mi_track_mem_##access( pstart, psize); }
-
-static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
-  bool is_double_free = false;
-  mi_block_t* n = mi_block_nextx(page, block, page->keys); // pretend it is freed, and get the decoded first field
-  if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 &&  // quick check: aligned pointer?
-      (n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL?
-  {
-    // Suspicous: decoded value a in block is in the same page (or NULL) -- maybe a double free?
-    // (continue in separate function to improve code generation)
-    is_double_free = mi_check_is_double_freex(page, block);
-  }
-  return is_double_free;
-}
-#else
-static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
-  MI_UNUSED(page);
-  MI_UNUSED(block);
-  return false;
-}
-#endif
-
-// ---------------------------------------------------------------------------
-// Check for heap block overflow by setting up padding at the end of the block
-// ---------------------------------------------------------------------------
-
-#if (MI_PADDING>0) && defined(MI_ENCODE_FREELIST) && !MI_TRACK_ENABLED
-static bool mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* delta, size_t* bsize) {
-  *bsize = mi_page_usable_block_size(page);
-  const mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + *bsize);
-  mi_track_mem_defined(padding,sizeof(mi_padding_t));
-  *delta = padding->delta;
-  uint32_t canary = padding->canary;
-  uintptr_t keys[2];
-  keys[0] = page->keys[0];
-  keys[1] = page->keys[1];
-  bool ok = ((uint32_t)mi_ptr_encode(page,block,keys) == canary && *delta <= *bsize);
-  mi_track_mem_noaccess(padding,sizeof(mi_padding_t));
-  return ok;
-}
-
-// Return the exact usable size of a block.
-static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) {
-  size_t bsize;
-  size_t delta;
-  bool ok = mi_page_decode_padding(page, block, &delta, &bsize);
-  mi_assert_internal(ok); mi_assert_internal(delta <= bsize);
-  return (ok ? bsize - delta : 0);
-}
-
-static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, size_t* size, size_t* wrong) {
-  size_t bsize;
-  size_t delta;
-  bool ok = mi_page_decode_padding(page, block, &delta, &bsize);
-  *size = *wrong = bsize;
-  if (!ok) return false;
-  mi_assert_internal(bsize >= delta);
-  *size = bsize - delta;
-  if (!mi_page_is_huge(page)) {
-    uint8_t* fill = (uint8_t*)block + bsize - delta;
-    const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes
-    mi_track_mem_defined(fill, maxpad);
-    for (size_t i = 0; i < maxpad; i++) {
-      if (fill[i] != MI_DEBUG_PADDING) {
-        *wrong = bsize - delta + i;
-        ok = false;
-        break;
-      }
-    }
-    mi_track_mem_noaccess(fill, maxpad);
-  }
-  return ok;
-}
-
-static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
-  size_t size;
-  size_t wrong;
-  if (!mi_verify_padding(page,block,&size,&wrong)) {
-    _mi_error_message(EFAULT, "buffer overflow in heap block %p of size %zu: write after %zu bytes\n", block, size, wrong );
-  }
-}
-
-// When a non-thread-local block is freed, it becomes part of the thread delayed free
-// list that is freed later by the owning heap. If the exact usable size is too small to
-// contain the pointer for the delayed list, then shrink the padding (by decreasing delta)
-// so it will later not trigger an overflow error in `mi_free_block`.
-static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) {
-  size_t bsize;
-  size_t delta;
-  bool ok = mi_page_decode_padding(page, block, &delta, &bsize);
-  mi_assert_internal(ok);
-  if (!ok || (bsize - delta) >= min_size) return;  // usually already enough space
-  mi_assert_internal(bsize >= min_size);
-  if (bsize < min_size) return;  // should never happen
-  size_t new_delta = (bsize - min_size);
-  mi_assert_internal(new_delta < bsize);
-  mi_padding_t* padding = (mi_padding_t*)((uint8_t*)block + bsize);
-  padding->delta = (uint32_t)new_delta;
-}
-#else
-static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
-  MI_UNUSED(page);
-  MI_UNUSED(block);
-}
-
-static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) {
-  MI_UNUSED(block);
-  return mi_page_usable_block_size(page);
-}
-
-static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) {
-  MI_UNUSED(page);
-  MI_UNUSED(block);
-  MI_UNUSED(min_size);
-}
-#endif
-
-// only maintain stats for smaller objects if requested
-#if (MI_STAT>0)
-static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
-  #if (MI_STAT < 2)  
-  MI_UNUSED(block);
-  #endif
-  mi_heap_t* const heap = mi_heap_get_default();
-  const size_t bsize = mi_page_usable_block_size(page);
-  #if (MI_STAT>1)
-  const size_t usize = mi_page_usable_size_of(page, block);
-  mi_heap_stat_decrease(heap, malloc, usize);
-  #endif  
-  if (bsize <= MI_MEDIUM_OBJ_SIZE_MAX) {
-    mi_heap_stat_decrease(heap, normal, bsize);
-    #if (MI_STAT > 1)
-    mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], 1);
-    #endif
-  }
-  else if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
-    mi_heap_stat_decrease(heap, large, bsize);
-  }
-  else {
-    mi_heap_stat_decrease(heap, huge, bsize);
-  }  
-}
-#else
-static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
-  MI_UNUSED(page); MI_UNUSED(block);
-}
-#endif
-
-#if MI_HUGE_PAGE_ABANDON
-#if (MI_STAT>0)
-// maintain stats for huge objects
-static void mi_stat_huge_free(const mi_page_t* page) {
-  mi_heap_t* const heap = mi_heap_get_default();
-  const size_t bsize = mi_page_block_size(page); // to match stats in `page.c:mi_page_huge_alloc`
-  if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
-    mi_heap_stat_decrease(heap, large, bsize);
-  }
-  else {
-    mi_heap_stat_decrease(heap, huge, bsize);
-  }
-}
-#else
-static void mi_stat_huge_free(const mi_page_t* page) {
-  MI_UNUSED(page);
-}
-#endif
-#endif
-
-// ------------------------------------------------------
-// Free
-// ------------------------------------------------------
-
-// multi-threaded free (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON)
-static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block)
-{
-  // The padding check may access the non-thread-owned page for the key values.
-  // that is safe as these are constant and the page won't be freed (as the block is not freed yet).
-  mi_check_padding(page, block);
-  mi_padding_shrink(page, block, sizeof(mi_block_t));       // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection
-  
-  // huge page segments are always abandoned and can be freed immediately
-  mi_segment_t* segment = _mi_page_segment(page);
-  if (segment->kind == MI_SEGMENT_HUGE) {
-    #if MI_HUGE_PAGE_ABANDON
-    // huge page segments are always abandoned and can be freed immediately
-    mi_stat_huge_free(page);
-    _mi_segment_huge_page_free(segment, page, block);
-    return;
-    #else
-    // huge pages are special as they occupy the entire segment
-    // as these are large we reset the memory occupied by the page so it is available to other threads
-    // (as the owning thread needs to actually free the memory later).
-    _mi_segment_huge_page_reset(segment, page, block);
-    #endif
-  }
-  
-  #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED                    // note: when tracking, cannot use mi_usable_size with multi-threading
-  if (segment->kind != MI_SEGMENT_HUGE) {                   // not for huge segments as we just reset the content
-    memset(block, MI_DEBUG_FREED, mi_usable_size(block));
-  }
-  #endif
-
-  // Try to put the block on either the page-local thread free list, or the heap delayed free list.
-  mi_thread_free_t tfreex;
-  bool use_delayed;
-  mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free);
-  do {
-    use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE);
-    if mi_unlikely(use_delayed) {
-      // unlikely: this only happens on the first concurrent free in a page that is in the full list
-      tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING);
-    }
-    else {
-      // usual: directly add to page thread_free list
-      mi_block_set_next(page, block, mi_tf_block(tfree));
-      tfreex = mi_tf_set_block(tfree,block);
-    }
-  } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
-
-  if mi_unlikely(use_delayed) {
-    // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`)
-    mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_load_acquire(&page->xheap)); //mi_page_heap(page);
-    mi_assert_internal(heap != NULL);
-    if (heap != NULL) {
-      // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity)
-      mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
-      do {
-        mi_block_set_nextx(heap,block,dfree, heap->keys);
-      } while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block));
-    }
-
-    // and reset the MI_DELAYED_FREEING flag
-    tfree = mi_atomic_load_relaxed(&page->xthread_free);
-    do {
-      tfreex = tfree;
-      mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING);
-      tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE);
-    } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
-  }
-}
-
-// regular free
-static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block)
-{
-  // and push it on the free list
-  //const size_t bsize = mi_page_block_size(page);
-  if mi_likely(local) {
-    // owning thread can free a block directly
-    if mi_unlikely(mi_check_is_double_free(page, block)) return;
-    mi_check_padding(page, block);
-    #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED
-    if (!mi_page_is_huge(page)) {   // huge page content may be already decommitted
-      memset(block, MI_DEBUG_FREED, mi_page_block_size(page));
-    }
-    #endif
-    mi_block_set_next(page, block, page->local_free);
-    page->local_free = block;
-    page->used--;
-    if mi_unlikely(mi_page_all_free(page)) {
-      _mi_page_retire(page);
-    }
-    else if mi_unlikely(mi_page_is_in_full(page)) {
-      _mi_page_unfull(page);
-    }
-  }
-  else {
-    _mi_free_block_mt(page,block);
-  }
-}
-
-
-// Adjust a block that was allocated aligned, to the actual start of the block in the page.
-mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) {
-  mi_assert_internal(page!=NULL && p!=NULL);
-  const size_t diff   = (uint8_t*)p - _mi_page_start(segment, page, NULL);
-  const size_t adjust = (diff % mi_page_block_size(page));
-  return (mi_block_t*)((uintptr_t)p - adjust);
-}
-
-
-void mi_decl_noinline _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept {
-  mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p);
-  mi_stat_free(page, block);                 // stat_free may access the padding
-  mi_track_free(p);
-  _mi_free_block(page, is_local, block);
-}
-
-// Get the segment data belonging to a pointer
-// This is just a single `and` in assembly but does further checks in debug mode
-// (and secure mode) if this was a valid pointer.
-static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg)
-{
-  MI_UNUSED(msg);
-  mi_assert(p != NULL);
-
-#if (MI_DEBUG>0)
-  if mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0) {
-    _mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p);
-    return NULL;
-  }
-#endif
-
-  mi_segment_t* const segment = _mi_ptr_segment(p);
-  mi_assert_internal(segment != NULL);
-
-#if (MI_DEBUG>0)
-  if mi_unlikely(!mi_is_in_heap_region(p)) {
-  #if (MI_INTPTR_SIZE == 8 && defined(__linux__))
-    if (((uintptr_t)p >> 40) != 0x7F) { // linux tends to align large blocks above 0x7F000000000 (issue #640)
-  #else
-    {
-  #endif
-      _mi_warning_message("%s: pointer might not point to a valid heap region: %p\n"
-        "(this may still be a valid very large allocation (over 64MiB))\n", msg, p);
-      if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) {
-        _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p);
-      }
-    }
-  }
-#endif
-#if (MI_DEBUG>0 || MI_SECURE>=4)
-  if mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie) {
-    _mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", msg, p);
-    return NULL;
-  }
-#endif
-
-  return segment;
-}
-
-// Free a block
-// fast path written carefully to prevent spilling on the stack
-void mi_free(void* p) mi_attr_noexcept
-{
-  if mi_unlikely(p == NULL) return;
-  mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
-  const bool          is_local= (_mi_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
-  mi_page_t* const    page    = _mi_segment_page_of(segment, p);
-
-  if mi_likely(is_local) {                       // thread-local free?
-    if mi_likely(page->flags.full_aligned == 0)  // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned)
-    {
-      mi_block_t* const block = (mi_block_t*)p;
-      if mi_unlikely(mi_check_is_double_free(page, block)) return;
-      mi_check_padding(page, block);
-      mi_stat_free(page, block);
-      #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED
-      memset(block, MI_DEBUG_FREED, mi_page_block_size(page));
-      #endif
-      mi_track_free(p);
-      mi_block_set_next(page, block, page->local_free);
-      page->local_free = block;
-      if mi_unlikely(--page->used == 0) {   // using this expression generates better code than: page->used--; if (mi_page_all_free(page))
-        _mi_page_retire(page);
-      }
-    }
-    else {
-      // page is full or contains (inner) aligned blocks; use generic path
-      _mi_free_generic(segment, page, true, p);
-    }
-  }
-  else {
-    // not thread-local; use generic path
-    _mi_free_generic(segment, page, false, p);
-  }
-}
-
-// return true if successful
-bool _mi_free_delayed_block(mi_block_t* block) {
-  // get segment and page
-  const mi_segment_t* const segment = _mi_ptr_segment(block);
-  mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
-  mi_assert_internal(_mi_thread_id() == segment->thread_id);
-  mi_page_t* const page = _mi_segment_page_of(segment, block);
-
-  // Clear the no-delayed flag so delayed freeing is used again for this page.
-  // This must be done before collecting the free lists on this page -- otherwise
-  // some blocks may end up in the page `thread_free` list with no blocks in the
-  // heap `thread_delayed_free` list which may cause the page to be never freed!
-  // (it would only be freed if we happen to scan it in `mi_page_queue_find_free_ex`)
-  if (!_mi_page_try_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */)) {
-    return false;
-  }
-
-  // collect all other non-local frees to ensure up-to-date `used` count
-  _mi_page_free_collect(page, false);
-
-  // and free the block (possibly freeing the page as well since used is updated)
-  _mi_free_block(page, true, block);
-  return true;
-}
-
-// Bytes available in a block
-mi_decl_noinline static size_t mi_page_usable_aligned_size_of(const mi_segment_t* segment, const mi_page_t* page, const void* p) mi_attr_noexcept {
-  const mi_block_t* block = _mi_page_ptr_unalign(segment, page, p);
-  const size_t size = mi_page_usable_size_of(page, block);
-  const ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)block;
-  mi_assert_internal(adjust >= 0 && (size_t)adjust <= size);
-  return (size - adjust);
-}
-
-static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept {
-  if (p == NULL) return 0;
-  const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg);
-  const mi_page_t* const page = _mi_segment_page_of(segment, p);
-  if mi_likely(!mi_page_has_aligned(page)) {
-    const mi_block_t* block = (const mi_block_t*)p;
-    return mi_page_usable_size_of(page, block);
-  }
-  else {
-    // split out to separate routine for improved code generation
-    return mi_page_usable_aligned_size_of(segment, page, p);
-  }
-}
-
-mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept {
-  return _mi_usable_size(p, "mi_usable_size");
-}
-
-
-// ------------------------------------------------------
-// Allocation extensions
-// ------------------------------------------------------
-
-void mi_free_size(void* p, size_t size) mi_attr_noexcept {
-  MI_UNUSED_RELEASE(size);
-  mi_assert(p == NULL || size <= _mi_usable_size(p,"mi_free_size"));
-  mi_free(p);
-}
-
-void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept {
-  MI_UNUSED_RELEASE(alignment);
-  mi_assert(((uintptr_t)p % alignment) == 0);
-  mi_free_size(p,size);
-}
-
-void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept {
-  MI_UNUSED_RELEASE(alignment);
-  mi_assert(((uintptr_t)p % alignment) == 0);
-  mi_free(p);
-}
-
-mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept {
-  size_t total;
-  if (mi_count_size_overflow(count,size,&total)) return NULL;
-  return mi_heap_zalloc(heap,total);
-}
-
-mi_decl_nodiscard mi_decl_restrict void* mi_calloc(size_t count, size_t size) mi_attr_noexcept {
-  return mi_heap_calloc(mi_get_default_heap(),count,size);
-}
-
-// Uninitialized `calloc`
-mi_decl_nodiscard extern mi_decl_restrict void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept {
-  size_t total;
-  if (mi_count_size_overflow(count, size, &total)) return NULL;
-  return mi_heap_malloc(heap, total);
-}
-
-mi_decl_nodiscard mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept {
-  return mi_heap_mallocn(mi_get_default_heap(),count,size);
-}
-
-// Expand (or shrink) in place (or fail)
-void* mi_expand(void* p, size_t newsize) mi_attr_noexcept {
-  #if MI_PADDING
-  // we do not shrink/expand with padding enabled
-  MI_UNUSED(p); MI_UNUSED(newsize);
-  return NULL;
-  #else
-  if (p == NULL) return NULL;
-  const size_t size = _mi_usable_size(p,"mi_expand");
-  if (newsize > size) return NULL;
-  return p; // it fits
-  #endif
-}
-
-void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept {
-  // if p == NULL then behave as malloc.
-  // else if size == 0 then reallocate to a zero-sized block (and don't return NULL, just as mi_malloc(0)).
-  // (this means that returning NULL always indicates an error, and `p` will not have been freed in that case.)
-  const size_t size = _mi_usable_size(p,"mi_realloc"); // also works if p == NULL (with size 0)
-  if mi_unlikely(newsize <= size && newsize >= (size / 2) && newsize > 0) {  // note: newsize must be > 0 or otherwise we return NULL for realloc(NULL,0)
-    // todo: adjust potential padding to reflect the new size?
-    mi_track_free_size(p, size);
-    mi_track_malloc(p,newsize,true);
-    return p;  // reallocation still fits and not more than 50% waste
-  }
-  void* newp = mi_heap_malloc(heap,newsize);
-  if mi_likely(newp != NULL) {
-    if (zero && newsize > size) {
-      // also set last word in the previous allocation to zero to ensure any padding is zero-initialized
-      const size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0);
-      memset((uint8_t*)newp + start, 0, newsize - start);
-    }
-    if mi_likely(p != NULL) {
-      if mi_likely(_mi_is_aligned(p, sizeof(uintptr_t))) {  // a client may pass in an arbitrary pointer `p`..
-        const size_t copysize = (newsize > size ? size : newsize);
-        mi_track_mem_defined(p,copysize);  // _mi_useable_size may be too large for byte precise memory tracking..
-        _mi_memcpy_aligned(newp, p, copysize);
-      }
-      mi_free(p); // only free the original pointer if successful
-    }
-  }
-  return newp;
-}
-
-mi_decl_nodiscard void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept {
-  return _mi_heap_realloc_zero(heap, p, newsize, false);
-}
-
-mi_decl_nodiscard void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept {
-  size_t total;
-  if (mi_count_size_overflow(count, size, &total)) return NULL;
-  return mi_heap_realloc(heap, p, total);
-}
-
-
-// Reallocate but free `p` on errors
-mi_decl_nodiscard void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept {
-  void* newp = mi_heap_realloc(heap, p, newsize);
-  if (newp==NULL && p!=NULL) mi_free(p);
-  return newp;
-}
-
-mi_decl_nodiscard void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept {
-  return _mi_heap_realloc_zero(heap, p, newsize, true);
-}
-
-mi_decl_nodiscard void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept {
-  size_t total;
-  if (mi_count_size_overflow(count, size, &total)) return NULL;
-  return mi_heap_rezalloc(heap, p, total);
-}
-
-
-mi_decl_nodiscard void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept {
-  return mi_heap_realloc(mi_get_default_heap(),p,newsize);
-}
-
-mi_decl_nodiscard void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept {
-  return mi_heap_reallocn(mi_get_default_heap(),p,count,size);
-}
-
-// Reallocate but free `p` on errors
-mi_decl_nodiscard void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept {
-  return mi_heap_reallocf(mi_get_default_heap(),p,newsize);
-}
-
-mi_decl_nodiscard void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept {
-  return mi_heap_rezalloc(mi_get_default_heap(), p, newsize);
-}
-
-mi_decl_nodiscard void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept {
-  return mi_heap_recalloc(mi_get_default_heap(), p, count, size);
-}
-
-
-
-// ------------------------------------------------------
-// strdup, strndup, and realpath
-// ------------------------------------------------------
-
-// `strdup` using mi_malloc
-mi_decl_nodiscard mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept {
-  if (s == NULL) return NULL;
-  size_t n = strlen(s);
-  char* t = (char*)mi_heap_malloc(heap,n+1);
-  if (t == NULL) return NULL;
-  _mi_memcpy(t, s, n);
-  t[n] = 0;
-  return t;
-}
-
-mi_decl_nodiscard mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexcept {
-  return mi_heap_strdup(mi_get_default_heap(), s);
-}
-
-// `strndup` using mi_malloc
-mi_decl_nodiscard mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept {
-  if (s == NULL) return NULL;
-  const char* end = (const char*)memchr(s, 0, n);  // find end of string in the first `n` characters (returns NULL if not found)
-  const size_t m = (end != NULL ? (size_t)(end - s) : n);  // `m` is the minimum of `n` or the end-of-string
-  mi_assert_internal(m <= n);
-  char* t = (char*)mi_heap_malloc(heap, m+1);
-  if (t == NULL) return NULL;
-  _mi_memcpy(t, s, m);
-  t[m] = 0;
-  return t;
-}
-
-mi_decl_nodiscard mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept {
-  return mi_heap_strndup(mi_get_default_heap(),s,n);
-}
-
-#ifndef __wasi__
-// `realpath` using mi_malloc
-#ifdef _WIN32
-#ifndef PATH_MAX
-#define PATH_MAX MAX_PATH
-#endif
-#include <windows.h>
-mi_decl_nodiscard mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept {
-  // todo: use GetFullPathNameW to allow longer file names
-  char buf[PATH_MAX];
-  DWORD res = GetFullPathNameA(fname, PATH_MAX, (resolved_name == NULL ? buf : resolved_name), NULL);
-  if (res == 0) {
-    errno = GetLastError(); return NULL;
-  }
-  else if (res > PATH_MAX) {
-    errno = EINVAL; return NULL;
-  }
-  else if (resolved_name != NULL) {
-    return resolved_name;
-  }
-  else {
-    return mi_heap_strndup(heap, buf, PATH_MAX);
-  }
-}
-#else
-/*
-#include <unistd.h>  // pathconf
-static size_t mi_path_max(void) {
-  static size_t path_max = 0;
-  if (path_max <= 0) {
-    long m = pathconf("/",_PC_PATH_MAX);
-    if (m <= 0) path_max = 4096;      // guess
-    else if (m < 256) path_max = 256; // at least 256
-    else path_max = m;
-  }
-  return path_max;
-}
-*/
-char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept {
-  if (resolved_name != NULL) {
-    return realpath(fname,resolved_name);
-  }
-  else {
-    char* rname = realpath(fname, NULL);
-    if (rname == NULL) return NULL;
-    char* result = mi_heap_strdup(heap, rname);
-    free(rname);  // use regular free! (which may be redirected to our free but that's ok)
-    return result;
-  }
-  /*
-    const size_t n  = mi_path_max();
-    char* buf = (char*)mi_malloc(n+1);
-    if (buf == NULL) {
-      errno = ENOMEM;
-      return NULL;
-    }
-    char* rname  = realpath(fname,buf);
-    char* result = mi_heap_strndup(heap,rname,n); // ok if `rname==NULL`
-    mi_free(buf);
-    return result;
-  }
-  */
-}
-#endif
-
-mi_decl_nodiscard mi_decl_restrict char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept {
-  return mi_heap_realpath(mi_get_default_heap(),fname,resolved_name);
-}
-#endif
-
-/*-------------------------------------------------------
-C++ new and new_aligned
-The standard requires calling into `get_new_handler` and
-throwing the bad_alloc exception on failure. If we compile
-with a C++ compiler we can implement this precisely. If we
-use a C compiler we cannot throw a `bad_alloc` exception
-but we call `exit` instead (i.e. not returning).
--------------------------------------------------------*/
-
-#ifdef __cplusplus
-#include <new>
-static bool mi_try_new_handler(bool nothrow) {
-  #if defined(_MSC_VER) || (__cplusplus >= 201103L)
-    std::new_handler h = std::get_new_handler();
-  #else
-    std::new_handler h = std::set_new_handler();
-    std::set_new_handler(h);
-  #endif
-  if (h==NULL) {
-    _mi_error_message(ENOMEM, "out of memory in 'new'");
-    if (!nothrow) {
-      throw std::bad_alloc();
-    }
-    return false;
-  }
-  else {
-    h();
-    return true;
-  }
-}
-#else
-typedef void (*std_new_handler_t)(void);
-
-#if (defined(__GNUC__) || (defined(__clang__) && !defined(_MSC_VER)))  // exclude clang-cl, see issue #631
-std_new_handler_t __attribute__((weak)) _ZSt15get_new_handlerv(void) {
-  return NULL;
-}
-static std_new_handler_t mi_get_new_handler(void) {
-  return _ZSt15get_new_handlerv();
-}
-#else
-// note: on windows we could dynamically link to `?get_new_handler@std@@YAP6AXXZXZ`.
-static std_new_handler_t mi_get_new_handler() {
-  return NULL;
-}
-#endif
-
-static bool mi_try_new_handler(bool nothrow) {
-  std_new_handler_t h = mi_get_new_handler();
-  if (h==NULL) {
-    _mi_error_message(ENOMEM, "out of memory in 'new'");
-    if (!nothrow) {
-      abort();  // cannot throw in plain C, use abort
-    }
-    return false;
-  }
-  else {
-    h();
-    return true;
-  }
-}
-#endif
-
-static mi_decl_noinline void* mi_heap_try_new(mi_heap_t* heap, size_t size, bool nothrow ) {
-  void* p = NULL;
-  while(p == NULL && mi_try_new_handler(nothrow)) {
-    p = mi_heap_malloc(heap,size);
-  }
-  return p;
-}
-
-static mi_decl_noinline void* mi_try_new(size_t size, bool nothrow) {
-  return mi_heap_try_new(mi_get_default_heap(), size, nothrow);
-}
-
-
-mi_decl_nodiscard mi_decl_restrict extern inline void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) {
-  void* p = mi_heap_malloc(heap,size);
-  if mi_unlikely(p == NULL) return mi_heap_try_new(heap, size, false);
-  return p;
-}
-
-mi_decl_nodiscard mi_decl_restrict void* mi_new(size_t size) {
-  return mi_heap_alloc_new(mi_get_default_heap(), size);
-}
-
-
-mi_decl_nodiscard mi_decl_restrict extern inline void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) {
-  size_t total;
-  if mi_unlikely(mi_count_size_overflow(count, size, &total)) {
-    mi_try_new_handler(false);  // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc
-    return NULL;
-  }
-  else {
-    return mi_heap_alloc_new(heap,total);
-  }
-}
-
-mi_decl_nodiscard mi_decl_restrict void* mi_new_n(size_t count, size_t size) {
-  return mi_heap_alloc_new_n(mi_get_default_heap(), size, count);
-}
-
-
-mi_decl_nodiscard mi_decl_restrict void* mi_new_nothrow(size_t size) mi_attr_noexcept {
-  void* p = mi_malloc(size);
-  if mi_unlikely(p == NULL) return mi_try_new(size, true);
-  return p;
-}
-
-mi_decl_nodiscard mi_decl_restrict void* mi_new_aligned(size_t size, size_t alignment) {
-  void* p;
-  do {
-    p = mi_malloc_aligned(size, alignment);
-  }
-  while(p == NULL && mi_try_new_handler(false));
-  return p;
-}
-
-mi_decl_nodiscard mi_decl_restrict void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_noexcept {
-  void* p;
-  do {
-    p = mi_malloc_aligned(size, alignment);
-  }
-  while(p == NULL && mi_try_new_handler(true));
-  return p;
-}
-
-mi_decl_nodiscard void* mi_new_realloc(void* p, size_t newsize) {
-  void* q;
-  do {
-    q = mi_realloc(p, newsize);
-  } while (q == NULL && mi_try_new_handler(false));
-  return q;
-}
-
-mi_decl_nodiscard void* mi_new_reallocn(void* p, size_t newcount, size_t size) {
-  size_t total;
-  if mi_unlikely(mi_count_size_overflow(newcount, size, &total)) {
-    mi_try_new_handler(false);  // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc
-    return NULL;
-  }
-  else {
-    return mi_new_realloc(p, total);
-  }
-}
-
-// ------------------------------------------------------
-// ensure explicit external inline definitions are emitted!
-// ------------------------------------------------------
-
-#ifdef __cplusplus
-void* _mi_externs[] = {
-  (void*)&_mi_page_malloc,
-  (void*)&_mi_heap_malloc_zero,
-  (void*)&_mi_heap_malloc_zero_ex,
-  (void*)&mi_malloc,
-  (void*)&mi_malloc_small,
-  (void*)&mi_zalloc_small,
-  (void*)&mi_heap_malloc,
-  (void*)&mi_heap_zalloc,
-  (void*)&mi_heap_malloc_small,
-  (void*)&mi_heap_alloc_new,
-  (void*)&mi_heap_alloc_new_n
-};
-#endif
diff --git a/3rd/mimalloc-2.0.9/src/arena.c b/3rd/mimalloc-2.0.9/src/arena.c
deleted file mode 100644
index 80dd4786..00000000
--- a/3rd/mimalloc-2.0.9/src/arena.c
+++ /dev/null
@@ -1,536 +0,0 @@
-/* ----------------------------------------------------------------------------
-Copyright (c) 2019-2022, Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
------------------------------------------------------------------------------*/
-
-/* ----------------------------------------------------------------------------
-"Arenas" are fixed area's of OS memory from which we can allocate
-large blocks (>= MI_ARENA_MIN_BLOCK_SIZE, 4MiB).
-In contrast to the rest of mimalloc, the arenas are shared between
-threads and need to be accessed using atomic operations.
-
-Currently arenas are only used to for huge OS page (1GiB) reservations,
-or direct OS memory reservations -- otherwise it delegates to direct allocation from the OS.
-In the future, we can expose an API to manually add more kinds of arenas
-which is sometimes needed for embedded devices or shared memory for example.
-(We can also employ this with WASI or `sbrk` systems to reserve large arenas
- on demand and be able to reuse them efficiently).
-
-The arena allocation needs to be thread safe and we use an atomic bitmap to allocate.
------------------------------------------------------------------------------*/
-#include "mimalloc.h"
-#include "mimalloc-internal.h"
-#include "mimalloc-atomic.h"
-
-#include <string.h>  // memset
-#include <errno.h> // ENOMEM
-
-#include "bitmap.h"  // atomic bitmap
-
-
-// os.c
-void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* stats);
-void  _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats);
-
-void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize);
-void  _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats);
-
-bool  _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
-bool  _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
-
-
-/* -----------------------------------------------------------
-  Arena allocation
------------------------------------------------------------ */
-
-// Block info: bit 0 contains the `in_use` bit, the upper bits the
-// size in count of arena blocks.
-typedef uintptr_t mi_block_info_t;
-#define MI_ARENA_BLOCK_SIZE   (MI_SEGMENT_SIZE)        // 64MiB  (must be at least MI_SEGMENT_ALIGN)
-#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2)  // 32MiB
-#define MI_MAX_ARENAS         (64)                     // not more than 126 (since we use 7 bits in the memid and an arena index + 1)
-
-// A memory arena descriptor
-typedef struct mi_arena_s {
-  mi_arena_id_t id;                       // arena id; 0 for non-specific
-  bool     exclusive;                     // only allow allocations if specifically for this arena
-  _Atomic(uint8_t*) start;                // the start of the memory area
-  size_t   block_count;                   // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
-  size_t   field_count;                   // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`)
-  int      numa_node;                     // associated NUMA node
-  bool     is_zero_init;                  // is the arena zero initialized?
-  bool     allow_decommit;                // is decommit allowed? if true, is_large should be false and blocks_committed != NULL
-  bool     is_large;                      // large- or huge OS pages (always committed)
-  _Atomic(size_t) search_idx;             // optimization to start the search for free blocks
-  mi_bitmap_field_t* blocks_dirty;        // are the blocks potentially non-zero?
-  mi_bitmap_field_t* blocks_committed;    // are the blocks committed? (can be NULL for memory that cannot be decommitted)
-  mi_bitmap_field_t  blocks_inuse[1];     // in-place bitmap of in-use blocks (of size `field_count`)
-} mi_arena_t;
-
-
-// The available arenas
-static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
-static mi_decl_cache_align _Atomic(size_t)      mi_arena_count; // = 0
-
-
-/* -----------------------------------------------------------
-  Arena id's
-  0 is used for non-arena's (like OS memory)
-  id = arena_index + 1
------------------------------------------------------------ */
-
-static size_t mi_arena_id_index(mi_arena_id_t id) {
-  return (size_t)(id <= 0 ? MI_MAX_ARENAS : id - 1);
-}
-
-static mi_arena_id_t mi_arena_id_create(size_t arena_index) {
-  mi_assert_internal(arena_index < MI_MAX_ARENAS);
-  mi_assert_internal(MI_MAX_ARENAS <= 126);
-  int id = (int)arena_index + 1;
-  mi_assert_internal(id >= 1 && id <= 127);
-  return id;
-}
-
-mi_arena_id_t _mi_arena_id_none(void) {
-  return 0;
-}
-
-static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclusive, mi_arena_id_t req_arena_id) {
-  return ((!arena_is_exclusive && req_arena_id == _mi_arena_id_none()) ||
-          (arena_id == req_arena_id));
-}
-
-
-/* -----------------------------------------------------------
-  Arena allocations get a memory id where the lower 8 bits are
-  the arena id, and the upper bits the block index.
------------------------------------------------------------ */
-
-// Use `0` as a special id for direct OS allocated memory.
-#define MI_MEMID_OS   0
-
-static size_t mi_arena_memid_create(mi_arena_id_t id, bool exclusive, mi_bitmap_index_t bitmap_index) {
-  mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow?
-  mi_assert_internal(id >= 0 && id <= 0x7F);
-  return ((bitmap_index << 8) | ((uint8_t)id & 0x7F) | (exclusive ? 0x80 : 0));
-}
-
-static bool mi_arena_memid_indices(size_t arena_memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) {
-  *bitmap_index = (arena_memid >> 8);
-  mi_arena_id_t id = (int)(arena_memid & 0x7F);
-  *arena_index = mi_arena_id_index(id);
-  return ((arena_memid & 0x80) != 0);
-}
-
-bool _mi_arena_memid_is_suitable(size_t arena_memid, mi_arena_id_t request_arena_id) {
-  mi_arena_id_t id = (int)(arena_memid & 0x7F);
-  bool exclusive = ((arena_memid & 0x80) != 0);
-  return mi_arena_id_is_suitable(id, exclusive, request_arena_id);
-}
-
-static size_t mi_block_count_of_size(size_t size) {
-  return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE);
-}
-
-/* -----------------------------------------------------------
-  Thread safe allocation in an arena
------------------------------------------------------------ */
-static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx)
-{
-  size_t idx = 0; // mi_atomic_load_relaxed(&arena->search_idx);  // start from last search; ok to be relaxed as the exact start does not matter
-  if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) {
-    mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx));  // start search from found location next time around
-    return true;
-  };
-  return false;
-}
-
-
-/* -----------------------------------------------------------
-  Arena Allocation
------------------------------------------------------------ */
-
-static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount,
-                                                   bool* commit, bool* large, bool* is_pinned, bool* is_zero, 
-                                                   mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld)
-{
-  MI_UNUSED(arena_index);
-  mi_assert_internal(mi_arena_id_index(arena->id) == arena_index);
-  if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL;
-
-  mi_bitmap_index_t bitmap_index;
-  if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL;
-
-  // claimed it! set the dirty bits (todo: no need for an atomic op here?)
-  void* p    = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE);
-  *memid     = mi_arena_memid_create(arena->id, arena->exclusive, bitmap_index);
-  *is_zero   = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL);
-  *large     = arena->is_large;
-  *is_pinned = (arena->is_large || !arena->allow_decommit);
-  if (arena->blocks_committed == NULL) {
-    // always committed
-    *commit = true;
-  }
-  else if (*commit) {
-    // arena not committed as a whole, but commit requested: ensure commit now
-    bool any_uncommitted;
-    _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted);
-    if (any_uncommitted) {
-      bool commit_zero;
-      _mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats);
-      if (commit_zero) *is_zero = true;
-    }
-  }
-  else {
-    // no need to commit, but check if already fully committed
-    *commit = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index);
-  }
-  return p;
-}
-
-// allocate from an arena with fallback to the OS
-static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool* commit, bool* large,
-                                                bool* is_pinned, bool* is_zero,
-                                                mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld )
-{
-  MI_UNUSED_RELEASE(alignment);
-  mi_assert_internal(alignment <= MI_SEGMENT_ALIGN);
-  const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
-  const size_t bcount = mi_block_count_of_size(size);
-  if mi_likely(max_arena == 0) return NULL;
-  mi_assert_internal(size <= bcount * MI_ARENA_BLOCK_SIZE);
-
-  size_t arena_index = mi_arena_id_index(req_arena_id);
-  if (arena_index < MI_MAX_ARENAS) {
-    // try a specific arena if requested
-    mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]);
-    if ((arena != NULL) &&
-        (arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local?
-        (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
-    {
-      void* p = mi_arena_alloc_from(arena, arena_index, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);
-      mi_assert_internal((uintptr_t)p % alignment == 0);
-      if (p != NULL) return p;
-    }
-  }
-  else {
-    // try numa affine allocation
-    for (size_t i = 0; i < max_arena; i++) {
-      mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
-      if (arena == NULL) break; // end reached
-      if ((arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local?
-          (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
-      {
-        void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);
-        mi_assert_internal((uintptr_t)p % alignment == 0);
-        if (p != NULL) return p;
-      }
-    }
-
-    // try from another numa node instead..
-    for (size_t i = 0; i < max_arena; i++) {
-      mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
-      if (arena == NULL) break; // end reached
-      if ((arena->numa_node >= 0 && arena->numa_node != numa_node) && // not numa local!
-          (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
-      {
-        void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);
-        mi_assert_internal((uintptr_t)p % alignment == 0);
-        if (p != NULL) return p;
-      }
-    }
-  }
-  return NULL;
-}
-
-void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero,
-                              mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld)
-{
-  mi_assert_internal(commit != NULL && is_pinned != NULL && is_zero != NULL && memid != NULL && tld != NULL);
-  mi_assert_internal(size > 0);
-  *memid   = MI_MEMID_OS;
-  *is_zero = false;
-  *is_pinned = false;
-
-  bool default_large = false;
-  if (large == NULL) large = &default_large;   // ensure `large != NULL`
-  const int numa_node = _mi_os_numa_node(tld); // current numa node
-
-  // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data)
-  if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) {
-    void* p = mi_arena_allocate(numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);
-    if (p != NULL) return p;
-  }
-
-  // finally, fall back to the OS
-  if (mi_option_is_enabled(mi_option_limit_os_alloc) || req_arena_id != _mi_arena_id_none()) {
-    errno = ENOMEM;
-    return NULL;
-  }
-  *is_zero = true;
-  *memid   = MI_MEMID_OS;
-  void* p = _mi_os_alloc_aligned_offset(size, alignment, align_offset, *commit, large, tld->stats);
-  if (p != NULL) { *is_pinned = *large; }
-  return p;
-}
-
-void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld)
-{
-  return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);
-}
-
-void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) {
-  if (size != NULL) *size = 0;
-  size_t arena_index = mi_arena_id_index(arena_id);
-  if (arena_index >= MI_MAX_ARENAS) return NULL;
-  mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]);
-  if (arena == NULL) return NULL;
-  if (size != NULL) *size = arena->block_count * MI_ARENA_BLOCK_SIZE;
-  return arena->start;
-}
-
-/* -----------------------------------------------------------
-  Arena free
------------------------------------------------------------ */
-
-void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats) {
-  mi_assert_internal(size > 0 && stats != NULL);
-  if (p==NULL) return;
-  if (size==0) return;
-
-  if (memid == MI_MEMID_OS) {
-    // was a direct OS allocation, pass through
-    _mi_os_free_aligned(p, size, alignment, align_offset, all_committed, stats);
-  }
-  else {
-    // allocated in an arena
-    mi_assert_internal(align_offset == 0);
-    size_t arena_idx;
-    size_t bitmap_idx;
-    mi_arena_memid_indices(memid, &arena_idx, &bitmap_idx);
-    mi_assert_internal(arena_idx < MI_MAX_ARENAS);
-    mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t,&mi_arenas[arena_idx]);
-    mi_assert_internal(arena != NULL);
-    const size_t blocks = mi_block_count_of_size(size);
-    // checks
-    if (arena == NULL) {
-      _mi_error_message(EINVAL, "trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid);
-      return;
-    }
-    mi_assert_internal(arena->field_count > mi_bitmap_index_field(bitmap_idx));
-    if (arena->field_count <= mi_bitmap_index_field(bitmap_idx)) {
-      _mi_error_message(EINVAL, "trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
-      return;
-    }
-    // potentially decommit
-    if (!arena->allow_decommit || arena->blocks_committed == NULL) {
-      mi_assert_internal(all_committed); // note: may be not true as we may "pretend" to be not committed (in segment.c)
-    }
-    else {
-      mi_assert_internal(arena->blocks_committed != NULL);
-      _mi_os_decommit(p, blocks * MI_ARENA_BLOCK_SIZE, stats); // ok if this fails
-      _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx);
-    }
-    // and make it available to others again
-    bool all_inuse = _mi_bitmap_unclaim_across(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx);
-    if (!all_inuse) {
-      _mi_error_message(EAGAIN, "trying to free an already freed block: %p, size %zu\n", p, size);
-      return;
-    };
-  }
-}
-
-/* -----------------------------------------------------------
-  Add an arena.
------------------------------------------------------------ */
-
-static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id) {
-  mi_assert_internal(arena != NULL);
-  mi_assert_internal((uintptr_t)mi_atomic_load_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0);
-  mi_assert_internal(arena->block_count > 0);
-  if (arena_id != NULL) *arena_id = -1;
-
-  size_t i = mi_atomic_increment_acq_rel(&mi_arena_count);
-  if (i >= MI_MAX_ARENAS) {
-    mi_atomic_decrement_acq_rel(&mi_arena_count);
-    return false;
-  }
-  mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena);
-  arena->id = mi_arena_id_create(i);
-  if (arena_id != NULL) *arena_id = arena->id;
-  return true;
-}
-
-bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept
-{
-  if (arena_id != NULL) *arena_id = _mi_arena_id_none();
-  if (size < MI_ARENA_BLOCK_SIZE) return false;
-
-  if (is_large) {
-    mi_assert_internal(is_committed);
-    is_committed = true;
-  }
-
-  const size_t bcount = size / MI_ARENA_BLOCK_SIZE;
-  const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS);
-  const size_t bitmaps = (is_committed ? 2 : 3);
-  const size_t asize  = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t));
-  mi_arena_t* arena   = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
-  if (arena == NULL) return false;
-
-  arena->id = _mi_arena_id_none();
-  arena->exclusive = exclusive;
-  arena->block_count = bcount;
-  arena->field_count = fields;
-  arena->start = (uint8_t*)start;
-  arena->numa_node    = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
-  arena->is_large     = is_large;
-  arena->is_zero_init = is_zero;
-  arena->allow_decommit = !is_large && !is_committed; // only allow decommit for initially uncommitted memory
-  arena->search_idx   = 0;
-  arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap
-  arena->blocks_committed = (!arena->allow_decommit ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap
-  // the bitmaps are already zero initialized due to os_alloc
-  // initialize committed bitmap?
-  if (arena->blocks_committed != NULL && is_committed) {
-    memset((void*)arena->blocks_committed, 0xFF, fields*sizeof(mi_bitmap_field_t)); // cast to void* to avoid atomic warning
-  }
-  // and claim leftover blocks if needed (so we never allocate there)
-  ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount;
-  mi_assert_internal(post >= 0);
-  if (post > 0) {
-    // don't use leftover bits at the end
-    mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post);
-    _mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL);
-  }
-
-  return mi_arena_add(arena, arena_id);
-
-}
-
-// Reserve a range of regular OS memory
-int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept
-{
-  if (arena_id != NULL) *arena_id = _mi_arena_id_none();
-  size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block
-  bool large = allow_large;
-  void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, &large, &_mi_stats_main);
-  if (start==NULL) return ENOMEM;
-  if (!mi_manage_os_memory_ex(start, size, (large || commit), large, true, -1, exclusive, arena_id)) {
-    _mi_os_free_ex(start, size, commit, &_mi_stats_main);
-    _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size,1024));
-    return ENOMEM;
-  }
-  _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size,1024), large ? " (in large os pages)" : "");
-  return 0;
-}
-
-bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept {
-  return mi_manage_os_memory_ex(start, size, is_committed, is_large, is_zero, numa_node, false, NULL);
-}
-
-int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept {
-  return mi_reserve_os_memory_ex(size, commit, allow_large, false, NULL);
-}
-
-
-/* -----------------------------------------------------------
-  Debugging
------------------------------------------------------------ */
-
-static size_t mi_debug_show_bitmap(const char* prefix, mi_bitmap_field_t* fields, size_t field_count ) {
-  size_t inuse_count = 0;
-  for (size_t i = 0; i < field_count; i++) {
-    char buf[MI_BITMAP_FIELD_BITS + 1];
-    uintptr_t field = mi_atomic_load_relaxed(&fields[i]);
-    for (size_t bit = 0; bit < MI_BITMAP_FIELD_BITS; bit++) {
-      bool inuse = ((((uintptr_t)1 << bit) & field) != 0);
-      if (inuse) inuse_count++;
-      buf[MI_BITMAP_FIELD_BITS - 1 - bit] = (inuse ? 'x' : '.');
-    }
-    buf[MI_BITMAP_FIELD_BITS] = 0;
-    _mi_verbose_message("%s%s\n", prefix, buf);
-  }
-  return inuse_count;
-}
-
-void mi_debug_show_arenas(void) mi_attr_noexcept {
-  size_t max_arenas = mi_atomic_load_relaxed(&mi_arena_count);
-  for (size_t i = 0; i < max_arenas; i++) {
-    mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
-    if (arena == NULL) break;
-    size_t inuse_count = 0;
-    _mi_verbose_message("arena %zu: %zu blocks with %zu fields\n", i, arena->block_count, arena->field_count);
-    inuse_count += mi_debug_show_bitmap("  ", arena->blocks_inuse, arena->field_count);
-    _mi_verbose_message("  blocks in use ('x'): %zu\n", inuse_count);
-  }
-}
-
-
-/* -----------------------------------------------------------
-  Reserve a huge page arena.
------------------------------------------------------------ */
-// reserve at a specific numa node
-int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept {
-  if (arena_id != NULL) *arena_id = -1;
-  if (pages==0) return 0;
-  if (numa_node < -1) numa_node = -1;
-  if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count();
-  size_t hsize = 0;
-  size_t pages_reserved = 0;
-  void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize);
-  if (p==NULL || pages_reserved==0) {
-    _mi_warning_message("failed to reserve %zu GiB huge pages\n", pages);
-    return ENOMEM;
-  }
-  _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages);
-
-  if (!mi_manage_os_memory_ex(p, hsize, true, true, true, numa_node, exclusive, arena_id)) {
-    _mi_os_free_huge_pages(p, hsize, &_mi_stats_main);
-    return ENOMEM;
-  }
-  return 0;
-}
-
-int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept {
-  return mi_reserve_huge_os_pages_at_ex(pages, numa_node, timeout_msecs, false, NULL);
-}
-
-// reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected)
-int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept {
-  if (pages == 0) return 0;
-
-  // pages per numa node
-  size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count());
-  if (numa_count <= 0) numa_count = 1;
-  const size_t pages_per = pages / numa_count;
-  const size_t pages_mod = pages % numa_count;
-  const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50);
-
-  // reserve evenly among numa nodes
-  for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
-    size_t node_pages = pages_per;  // can be 0
-    if (numa_node < pages_mod) node_pages++;
-    int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per);
-    if (err) return err;
-    if (pages < node_pages) {
-      pages = 0;
-    }
-    else {
-      pages -= node_pages;
-    }
-  }
-
-  return 0;
-}
-
-int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
-  MI_UNUSED(max_secs);
-  _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n");
-  if (pages_reserved != NULL) *pages_reserved = 0;
-  int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0));
-  if (err==0 && pages_reserved!=NULL) *pages_reserved = pages;
-  return err;
-}
diff --git a/3rd/mimalloc-2.0.9/src/bitmap.c b/3rd/mimalloc-2.0.9/src/bitmap.c
deleted file mode 100644
index 4ea9f4af..00000000
--- a/3rd/mimalloc-2.0.9/src/bitmap.c
+++ /dev/null
@@ -1,414 +0,0 @@
-/* ----------------------------------------------------------------------------
-Copyright (c) 2019-2021 Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
------------------------------------------------------------------------------*/
-
-/* ----------------------------------------------------------------------------
-Concurrent bitmap that can set/reset sequences of bits atomically,
-represeted as an array of fields where each field is a machine word (`size_t`)
-
-There are two api's; the standard one cannot have sequences that cross
-between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
-(this is used in region allocation)
-
-The `_across` postfixed functions do allow sequences that can cross over
-between the fields. (This is used in arena allocation)
----------------------------------------------------------------------------- */
-
-#include "mimalloc.h"
-#include "mimalloc-internal.h"
-#include "bitmap.h"
-
-/* -----------------------------------------------------------
-  Bitmap definition
------------------------------------------------------------ */
-
-// The bit mask for a given number of blocks at a specified bit index.
-static inline size_t mi_bitmap_mask_(size_t count, size_t bitidx) {
-  mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS);
-  mi_assert_internal(count > 0);
-  if (count >= MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL;
-  if (count == 0) return 0;
-  return ((((size_t)1 << count) - 1) << bitidx);
-}
-
-
-/* -----------------------------------------------------------
-  Claim a bit sequence atomically
------------------------------------------------------------ */
-
-// Try to atomically claim a sequence of `count` bits in a single
-// field at `idx` in `bitmap`. Returns `true` on success.
-inline bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx)
-{
-  mi_assert_internal(bitmap_idx != NULL);
-  mi_assert_internal(count <= MI_BITMAP_FIELD_BITS);
-  mi_assert_internal(count > 0);
-  mi_bitmap_field_t* field = &bitmap[idx];
-  size_t map  = mi_atomic_load_relaxed(field);
-  if (map==MI_BITMAP_FIELD_FULL) return false; // short cut
-
-  // search for 0-bit sequence of length count
-  const size_t mask = mi_bitmap_mask_(count, 0);
-  const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count;
-
-#ifdef MI_HAVE_FAST_BITSCAN
-  size_t bitidx = mi_ctz(~map);    // quickly find the first zero bit if possible
-#else
-  size_t bitidx = 0;               // otherwise start at 0
-#endif
-  size_t m = (mask << bitidx);     // invariant: m == mask shifted by bitidx
-
-  // scan linearly for a free range of zero bits
-  while (bitidx <= bitidx_max) {
-    const size_t mapm = map & m;
-    if (mapm == 0) {  // are the mask bits free at bitidx?
-      mi_assert_internal((m >> bitidx) == mask); // no overflow?
-      const size_t newmap = map | m;
-      mi_assert_internal((newmap^map) >> bitidx == mask);
-      if (!mi_atomic_cas_weak_acq_rel(field, &map, newmap)) {  // TODO: use strong cas here?
-        // no success, another thread claimed concurrently.. keep going (with updated `map`)
-        continue;
-      }
-      else {
-        // success, we claimed the bits!
-        *bitmap_idx = mi_bitmap_index_create(idx, bitidx);
-        return true;
-      }
-    }
-    else {
-      // on to the next bit range
-#ifdef MI_HAVE_FAST_BITSCAN
-      const size_t shift = (count == 1 ? 1 : mi_bsr(mapm) - bitidx + 1);
-      mi_assert_internal(shift > 0 && shift <= count);
-#else
-      const size_t shift = 1;
-#endif
-      bitidx += shift;
-      m <<= shift;
-    }
-  }
-  // no bits found
-  return false;
-}
-
-// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success.
-// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
-// `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields.
-bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) {
-  size_t idx = start_field_idx;
-  for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
-    if (idx >= bitmap_fields) idx = 0; // wrap
-    if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
-      return true;
-    }
-  }
-  return false;
-}
-
-// Like _mi_bitmap_try_find_from_claim but with an extra predicate that must be fullfilled
-bool _mi_bitmap_try_find_from_claim_pred(mi_bitmap_t bitmap, const size_t bitmap_fields, 
-            const size_t start_field_idx, const size_t count, 
-            mi_bitmap_pred_fun_t pred_fun, void* pred_arg,            
-            mi_bitmap_index_t* bitmap_idx) {
-  size_t idx = start_field_idx;
-  for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
-    if (idx >= bitmap_fields) idx = 0; // wrap
-    if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
-      if (pred_fun == NULL || pred_fun(*bitmap_idx, pred_arg)) { 
-        return true;
-      }
-      // predicate returned false, unclaim and look further
-      _mi_bitmap_unclaim(bitmap, bitmap_fields, count, *bitmap_idx);
-    }
-  }
-  return false;
-}
-
-/*
-// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success.
-// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields.
-bool _mi_bitmap_try_find_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t count, mi_bitmap_index_t* bitmap_idx) {
-  return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, 0, count, bitmap_idx);
-}
-*/
-
-// Set `count` bits at `bitmap_idx` to 0 atomically
-// Returns `true` if all `count` bits were 1 previously.
-bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
-  const size_t idx = mi_bitmap_index_field(bitmap_idx);
-  const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
-  const size_t mask = mi_bitmap_mask_(count, bitidx);
-  mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
-  // mi_assert_internal((bitmap[idx] & mask) == mask);
-  size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask);
-  return ((prev & mask) == mask);
-}
-
-
-// Set `count` bits at `bitmap_idx` to 1 atomically
-// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
-bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) {
-  const size_t idx = mi_bitmap_index_field(bitmap_idx);
-  const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
-  const size_t mask = mi_bitmap_mask_(count, bitidx);
-  mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
-  //mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0);
-  size_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask);
-  if (any_zero != NULL) *any_zero = ((prev & mask) != mask);
-  return ((prev & mask) == 0);
-}
-
-// Returns `true` if all `count` bits were 1. `any_ones` is `true` if there was at least one bit set to one.
-static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_ones) {
-  const size_t idx = mi_bitmap_index_field(bitmap_idx);
-  const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
-  const size_t mask = mi_bitmap_mask_(count, bitidx);
-  mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
-  size_t field = mi_atomic_load_relaxed(&bitmap[idx]);
-  if (any_ones != NULL) *any_ones = ((field & mask) != 0);
-  return ((field & mask) == mask);
-}
-
-bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
-  return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL);
-}
-
-bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
-  bool any_ones;
-  mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, &any_ones);
-  return any_ones;
-}
-
-
-//--------------------------------------------------------------------------
-// the `_across` functions work on bitmaps where sequences can cross over
-// between the fields. This is used in arena allocation
-//--------------------------------------------------------------------------
-
-// Try to atomically claim a sequence of `count` bits starting from the field
-// at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success.
-static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx)
-{
-  mi_assert_internal(bitmap_idx != NULL);
-
-  // check initial trailing zeros
-  mi_bitmap_field_t* field = &bitmap[idx];
-  size_t map = mi_atomic_load_relaxed(field);
-  const size_t initial = mi_clz(map);  // count of initial zeros starting at idx
-  mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS);
-  if (initial == 0)     return false;
-  if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx);     // no need to cross fields
-  if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries
-
-  // scan ahead
-  size_t found = initial;
-  size_t mask = 0;     // mask bits for the final field
-  while(found < count) {
-    field++;
-    map = mi_atomic_load_relaxed(field);
-    const size_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found));
-    mask = mi_bitmap_mask_(mask_bits, 0);
-    if ((map & mask) != 0) return false;
-    found += mask_bits;
-  }
-  mi_assert_internal(field < &bitmap[bitmap_fields]);
-
-  // found range of zeros up to the final field; mask contains mask in the final field
-  // now claim it atomically
-  mi_bitmap_field_t* const final_field = field;
-  const size_t final_mask = mask;
-  mi_bitmap_field_t* const initial_field = &bitmap[idx];
-  const size_t initial_mask = mi_bitmap_mask_(initial, MI_BITMAP_FIELD_BITS - initial);
-
-  // initial field
-  size_t newmap;
-  field = initial_field;
-  map = mi_atomic_load_relaxed(field);
-  do {
-    newmap = map | initial_mask;
-    if ((map & initial_mask) != 0) { goto rollback; };
-  } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
-
-  // intermediate fields
-  while (++field < final_field) {
-    newmap = MI_BITMAP_FIELD_FULL;
-    map = 0;
-    if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { goto rollback; }
-  }
-
-  // final field
-  mi_assert_internal(field == final_field);
-  map = mi_atomic_load_relaxed(field);
-  do {
-    newmap = map | final_mask;
-    if ((map & final_mask) != 0) { goto rollback; }
-  } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
-
-  // claimed!
-  *bitmap_idx = mi_bitmap_index_create(idx, MI_BITMAP_FIELD_BITS - initial);
-  return true;
-
-rollback:
-  // roll back intermediate fields
-  while (--field > initial_field) {
-    newmap = 0;
-    map = MI_BITMAP_FIELD_FULL;
-    mi_assert_internal(mi_atomic_load_relaxed(field) == map);
-    mi_atomic_store_release(field, newmap);
-  }
-  if (field == initial_field) {
-    map = mi_atomic_load_relaxed(field);
-    do {
-      mi_assert_internal((map & initial_mask) == initial_mask);
-      newmap = map & ~initial_mask;
-    } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
-  }
-  // retry? (we make a recursive call instead of goto to be able to use const declarations)
-  if (retries < 4) {
-    return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx);
-  }
-  else {
-    return false;
-  }
-}
-
-
-// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success.
-// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
-bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) {
-  mi_assert_internal(count > 0);
-  if (count==1) return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx);
-  size_t idx = start_field_idx;
-  for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
-    if (idx >= bitmap_fields) idx = 0; // wrap
-    // try to claim inside the field
-    if (count <= MI_BITMAP_FIELD_BITS) {
-      if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
-        return true;
-      }
-    }
-    // try to claim across fields
-    if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx)) {
-      return true;
-    }
-  }
-  return false;
-}
-
-// Helper for masks across fields; returns the mid count, post_mask may be 0
-static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_fields, size_t count, size_t* pre_mask, size_t* mid_mask, size_t* post_mask) {
-  MI_UNUSED_RELEASE(bitmap_fields);
-  const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
-  if mi_likely(bitidx + count <= MI_BITMAP_FIELD_BITS) {
-    *pre_mask = mi_bitmap_mask_(count, bitidx);
-    *mid_mask = 0;
-    *post_mask = 0;
-    mi_assert_internal(mi_bitmap_index_field(bitmap_idx) < bitmap_fields);
-    return 0;
-  }
-  else {
-    const size_t pre_bits = MI_BITMAP_FIELD_BITS - bitidx;
-    mi_assert_internal(pre_bits < count);
-    *pre_mask = mi_bitmap_mask_(pre_bits, bitidx);
-    count -= pre_bits;
-    const size_t mid_count = (count / MI_BITMAP_FIELD_BITS);
-    *mid_mask = MI_BITMAP_FIELD_FULL;
-    count %= MI_BITMAP_FIELD_BITS;
-    *post_mask = (count==0 ? 0 : mi_bitmap_mask_(count, 0));
-    mi_assert_internal(mi_bitmap_index_field(bitmap_idx) + mid_count + (count==0 ? 0 : 1) < bitmap_fields);
-    return mid_count;
-  }
-}
-
-// Set `count` bits at `bitmap_idx` to 0 atomically
-// Returns `true` if all `count` bits were 1 previously.
-bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
-  size_t idx = mi_bitmap_index_field(bitmap_idx);
-  size_t pre_mask;
-  size_t mid_mask;
-  size_t post_mask;
-  size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
-  bool all_one = true;
-  mi_bitmap_field_t* field = &bitmap[idx];
-  size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask);
-  if ((prev & pre_mask) != pre_mask) all_one = false;
-  while(mid_count-- > 0) {
-    prev = mi_atomic_and_acq_rel(field++, ~mid_mask);
-    if ((prev & mid_mask) != mid_mask) all_one = false;
-  }
-  if (post_mask!=0) {
-    prev = mi_atomic_and_acq_rel(field, ~post_mask);
-    if ((prev & post_mask) != post_mask) all_one = false;
-  }
-  return all_one;
-}
-
-// Set `count` bits at `bitmap_idx` to 1 atomically
-// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
-bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) {
-  size_t idx = mi_bitmap_index_field(bitmap_idx);
-  size_t pre_mask;
-  size_t mid_mask;
-  size_t post_mask;
-  size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
-  bool all_zero = true;
-  bool any_zero = false;
-  _Atomic(size_t)*field = &bitmap[idx];
-  size_t prev = mi_atomic_or_acq_rel(field++, pre_mask);
-  if ((prev & pre_mask) != 0) all_zero = false;
-  if ((prev & pre_mask) != pre_mask) any_zero = true;
-  while (mid_count-- > 0) {
-    prev = mi_atomic_or_acq_rel(field++, mid_mask);
-    if ((prev & mid_mask) != 0) all_zero = false;
-    if ((prev & mid_mask) != mid_mask) any_zero = true;
-  }
-  if (post_mask!=0) {
-    prev = mi_atomic_or_acq_rel(field, post_mask);
-    if ((prev & post_mask) != 0) all_zero = false;
-    if ((prev & post_mask) != post_mask) any_zero = true;
-  }
-  if (pany_zero != NULL) *pany_zero = any_zero;
-  return all_zero;
-}
-
-
-// Returns `true` if all `count` bits were 1.
-// `any_ones` is `true` if there was at least one bit set to one.
-static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) {
-  size_t idx = mi_bitmap_index_field(bitmap_idx);
-  size_t pre_mask;
-  size_t mid_mask;
-  size_t post_mask;
-  size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
-  bool all_ones = true;
-  bool any_ones = false;
-  mi_bitmap_field_t* field = &bitmap[idx];
-  size_t prev = mi_atomic_load_relaxed(field++);
-  if ((prev & pre_mask) != pre_mask) all_ones = false;
-  if ((prev & pre_mask) != 0) any_ones = true;
-  while (mid_count-- > 0) {
-    prev = mi_atomic_load_relaxed(field++);
-    if ((prev & mid_mask) != mid_mask) all_ones = false;
-    if ((prev & mid_mask) != 0) any_ones = true;
-  }
-  if (post_mask!=0) {
-    prev = mi_atomic_load_relaxed(field);
-    if ((prev & post_mask) != post_mask) all_ones = false;
-    if ((prev & post_mask) != 0) any_ones = true;
-  }
-  if (pany_ones != NULL) *pany_ones = any_ones;
-  return all_ones;
-}
-
-bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
-  return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL);
-}
-
-bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
-  bool any_ones;
-  mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones);
-  return any_ones;
-}
diff --git a/3rd/mimalloc-2.0.9/src/bitmap.h b/3rd/mimalloc-2.0.9/src/bitmap.h
deleted file mode 100644
index 0c501ec1..00000000
--- a/3rd/mimalloc-2.0.9/src/bitmap.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/* ----------------------------------------------------------------------------
-Copyright (c) 2019-2020 Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
------------------------------------------------------------------------------*/
-
-/* ----------------------------------------------------------------------------
-Concurrent bitmap that can set/reset sequences of bits atomically,
-represeted as an array of fields where each field is a machine word (`size_t`)
-
-There are two api's; the standard one cannot have sequences that cross
-between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
-(this is used in region allocation)
-
-The `_across` postfixed functions do allow sequences that can cross over
-between the fields. (This is used in arena allocation)
----------------------------------------------------------------------------- */
-#pragma once
-#ifndef MI_BITMAP_H
-#define MI_BITMAP_H
-
-/* -----------------------------------------------------------
-  Bitmap definition
------------------------------------------------------------ */
-
-#define MI_BITMAP_FIELD_BITS   (8*MI_SIZE_SIZE)
-#define MI_BITMAP_FIELD_FULL   (~((size_t)0))   // all bits set
-
-// An atomic bitmap of `size_t` fields
-typedef _Atomic(size_t)  mi_bitmap_field_t;
-typedef mi_bitmap_field_t*  mi_bitmap_t;
-
-// A bitmap index is the index of the bit in a bitmap.
-typedef size_t mi_bitmap_index_t;
-
-// Create a bit index.
-static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) {
-  mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS);
-  return (idx*MI_BITMAP_FIELD_BITS) + bitidx;
-}
-
-// Create a bit index.
-static inline mi_bitmap_index_t mi_bitmap_index_create_from_bit(size_t full_bitidx) {  
-  return mi_bitmap_index_create(full_bitidx / MI_BITMAP_FIELD_BITS, full_bitidx % MI_BITMAP_FIELD_BITS);
-}
-
-// Get the field index from a bit index.
-static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) {
-  return (bitmap_idx / MI_BITMAP_FIELD_BITS);
-}
-
-// Get the bit index in a bitmap field
-static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) {
-  return (bitmap_idx % MI_BITMAP_FIELD_BITS);
-}
-
-// Get the full bit index
-static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) {
-  return bitmap_idx;
-}
-
-/* -----------------------------------------------------------
-  Claim a bit sequence atomically
------------------------------------------------------------ */
-
-// Try to atomically claim a sequence of `count` bits in a single
-// field at `idx` in `bitmap`. Returns `true` on success.
-bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
-
-// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
-// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields.
-bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
-
-// Like _mi_bitmap_try_find_from_claim but with an extra predicate that must be fullfilled
-typedef bool (mi_cdecl *mi_bitmap_pred_fun_t)(mi_bitmap_index_t bitmap_idx, void* pred_arg);
-bool _mi_bitmap_try_find_from_claim_pred(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_pred_fun_t pred_fun, void* pred_arg, mi_bitmap_index_t* bitmap_idx);
-
-// Set `count` bits at `bitmap_idx` to 0 atomically
-// Returns `true` if all `count` bits were 1 previously.
-bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
-
-// Set `count` bits at `bitmap_idx` to 1 atomically
-// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
-bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero);
-
-bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
-bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
-
-
-//--------------------------------------------------------------------------
-// the `_across` functions work on bitmaps where sequences can cross over
-// between the fields. This is used in arena allocation
-//--------------------------------------------------------------------------
-
-// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success.
-// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
-bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
-
-// Set `count` bits at `bitmap_idx` to 0 atomically
-// Returns `true` if all `count` bits were 1 previously.
-bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
-
-// Set `count` bits at `bitmap_idx` to 1 atomically
-// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
-bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero);
-
-bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
-bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
-
-#endif
diff --git a/3rd/mimalloc-2.0.9/src/heap.c b/3rd/mimalloc-2.0.9/src/heap.c
deleted file mode 100644
index ac2d042b..00000000
--- a/3rd/mimalloc-2.0.9/src/heap.c
+++ /dev/null
@@ -1,602 +0,0 @@
-/*----------------------------------------------------------------------------
-Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
------------------------------------------------------------------------------*/
-
-#include "mimalloc.h"
-#include "mimalloc-internal.h"
-#include "mimalloc-atomic.h"
-
-#include <string.h>  // memset, memcpy
-
-#if defined(_MSC_VER) && (_MSC_VER < 1920)
-#pragma warning(disable:4204)  // non-constant aggregate initializer
-#endif
-
-/* -----------------------------------------------------------
-  Helpers
------------------------------------------------------------ */
-
-// return `true` if ok, `false` to break
-typedef bool (heap_page_visitor_fun)(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2);
-
-// Visit all pages in a heap; returns `false` if break was called.
-static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void* arg1, void* arg2)
-{
-  if (heap==NULL || heap->page_count==0) return 0;
-
-  // visit all pages
-  #if MI_DEBUG>1
-  size_t total = heap->page_count;
-  #endif
-  size_t count = 0;
-  for (size_t i = 0; i <= MI_BIN_FULL; i++) {
-    mi_page_queue_t* pq = &heap->pages[i];
-    mi_page_t* page = pq->first;
-    while(page != NULL) {
-      mi_page_t* next = page->next; // save next in case the page gets removed from the queue
-      mi_assert_internal(mi_page_heap(page) == heap);
-      count++;
-      if (!fn(heap, pq, page, arg1, arg2)) return false;
-      page = next; // and continue
-    }
-  }
-  mi_assert_internal(count == total);
-  return true;
-}
-
-
-#if MI_DEBUG>=2
-static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
-  MI_UNUSED(arg1);
-  MI_UNUSED(arg2);
-  MI_UNUSED(pq);
-  mi_assert_internal(mi_page_heap(page) == heap);
-  mi_segment_t* segment = _mi_page_segment(page);
-  mi_assert_internal(segment->thread_id == heap->thread_id);
-  mi_assert_expensive(_mi_page_is_valid(page));
-  return true;
-}
-#endif
-#if MI_DEBUG>=3
-static bool mi_heap_is_valid(mi_heap_t* heap) {
-  mi_assert_internal(heap!=NULL);
-  mi_heap_visit_pages(heap, &mi_heap_page_is_valid, NULL, NULL);
-  return true;
-}
-#endif
-
-
-
-
-/* -----------------------------------------------------------
-  "Collect" pages by migrating `local_free` and `thread_free`
-  lists and freeing empty pages. This is done when a thread
-  stops (and in that case abandons pages if there are still
-  blocks alive)
------------------------------------------------------------ */
-
-typedef enum mi_collect_e {
-  MI_NORMAL,
-  MI_FORCE,
-  MI_ABANDON
-} mi_collect_t;
-
-
-static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg_collect, void* arg2 ) {
-  MI_UNUSED(arg2);
-  MI_UNUSED(heap);
-  mi_assert_internal(mi_heap_page_is_valid(heap, pq, page, NULL, NULL));
-  mi_collect_t collect = *((mi_collect_t*)arg_collect);
-  _mi_page_free_collect(page, collect >= MI_FORCE);
-  if (mi_page_all_free(page)) {
-    // no more used blocks, free the page.
-    // note: this will free retired pages as well.
-    _mi_page_free(page, pq, collect >= MI_FORCE);
-  }
-  else if (collect == MI_ABANDON) {
-    // still used blocks but the thread is done; abandon the page
-    _mi_page_abandon(page, pq);
-  }
-  return true; // don't break
-}
-
-static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
-  MI_UNUSED(arg1);
-  MI_UNUSED(arg2);
-  MI_UNUSED(heap);
-  MI_UNUSED(pq);
-  _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
-  return true; // don't break
-}
-
-static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
-{
-  if (heap==NULL || !mi_heap_is_initialized(heap)) return;
-
-  const bool force = collect >= MI_FORCE;  
-  _mi_deferred_free(heap, force);
-
-  // note: never reclaim on collect but leave it to threads that need storage to reclaim 
-  const bool force_main = 
-    #ifdef NDEBUG
-      collect == MI_FORCE
-    #else
-      collect >= MI_FORCE
-    #endif
-      && _mi_is_main_thread() && mi_heap_is_backing(heap) && !heap->no_reclaim;
-
-  if (force_main) {
-    // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments.
-    // if all memory is freed by now, all segments should be freed.
-    _mi_abandoned_reclaim_all(heap, &heap->tld->segments);
-  }
-
-  // if abandoning, mark all pages to no longer add to delayed_free
-  if (collect == MI_ABANDON) {
-    mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL);
-  }
-
-  // free all current thread delayed blocks.
-  // (if abandoning, after this there are no more thread-delayed references into the pages.)
-  _mi_heap_delayed_free_all(heap);
-
-  // collect retired pages
-  _mi_heap_collect_retired(heap, force);
-
-  // collect all pages owned by this thread
-  mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);
-  mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL );
-
-  // collect abandoned segments (in particular, decommit expired parts of segments in the abandoned segment list)
-  // note: forced decommit can be quite expensive if many threads are created/destroyed so we do not force on abandonment
-  _mi_abandoned_collect(heap, collect == MI_FORCE /* force? */, &heap->tld->segments);
-
-  // collect segment local caches
-  if (force) {
-    _mi_segment_thread_collect(&heap->tld->segments);
-  }
-
-  // decommit in global segment caches
-  // note: forced decommit can be quite expensive if many threads are created/destroyed so we do not force on abandonment
-  _mi_segment_cache_collect( collect == MI_FORCE, &heap->tld->os);  
-
-  // collect regions on program-exit (or shared library unload)
-  if (force && _mi_is_main_thread() && mi_heap_is_backing(heap)) {
-    //_mi_mem_collect(&heap->tld->os);
-  }
-}
-
-void _mi_heap_collect_abandon(mi_heap_t* heap) {
-  mi_heap_collect_ex(heap, MI_ABANDON);
-}
-
-void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept {
-  mi_heap_collect_ex(heap, (force ? MI_FORCE : MI_NORMAL));
-}
-
-void mi_collect(bool force) mi_attr_noexcept {
-  mi_heap_collect(mi_get_default_heap(), force);
-}
-
-
-/* -----------------------------------------------------------
-  Heap new
------------------------------------------------------------ */
-
-mi_heap_t* mi_heap_get_default(void) {
-  mi_thread_init();
-  return mi_get_default_heap();
-}
-
-mi_heap_t* mi_heap_get_backing(void) {
-  mi_heap_t* heap = mi_heap_get_default();
-  mi_assert_internal(heap!=NULL);
-  mi_heap_t* bheap = heap->tld->heap_backing;
-  mi_assert_internal(bheap!=NULL);
-  mi_assert_internal(bheap->thread_id == _mi_thread_id());
-  return bheap;
-}
-
-mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena( mi_arena_id_t arena_id ) {
-  mi_heap_t* bheap = mi_heap_get_backing();
-  mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t);  // todo: OS allocate in secure mode?
-  if (heap==NULL) return NULL;
-  _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t));
-  heap->tld = bheap->tld;
-  heap->thread_id = _mi_thread_id();
-  heap->arena_id = arena_id;
-  _mi_random_split(&bheap->random, &heap->random);
-  heap->cookie  = _mi_heap_random_next(heap) | 1;
-  heap->keys[0] = _mi_heap_random_next(heap);
-  heap->keys[1] = _mi_heap_random_next(heap);
-  heap->no_reclaim = true;  // don't reclaim abandoned pages or otherwise destroy is unsafe
-  // push on the thread local heaps list
-  heap->next = heap->tld->heaps;
-  heap->tld->heaps = heap;
-  return heap;
-}
-
-mi_decl_nodiscard mi_heap_t* mi_heap_new(void) {
-  return mi_heap_new_in_arena(_mi_arena_id_none());
-}
-
-bool _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid) {
-  return _mi_arena_memid_is_suitable(memid, heap->arena_id);
-}
-
-uintptr_t _mi_heap_random_next(mi_heap_t* heap) {
-  return _mi_random_next(&heap->random);
-}
-
-// zero out the page queues
-static void mi_heap_reset_pages(mi_heap_t* heap) {
-  mi_assert_internal(heap != NULL);
-  mi_assert_internal(mi_heap_is_initialized(heap));
-  // TODO: copy full empty heap instead?
-  memset(&heap->pages_free_direct, 0, sizeof(heap->pages_free_direct));
-#ifdef MI_MEDIUM_DIRECT
-  memset(&heap->pages_free_medium, 0, sizeof(heap->pages_free_medium));
-#endif
-  _mi_memcpy_aligned(&heap->pages, &_mi_heap_empty.pages, sizeof(heap->pages));
-  heap->thread_delayed_free = NULL;
-  heap->page_count = 0;
-}
-
-// called from `mi_heap_destroy` and `mi_heap_delete` to free the internal heap resources.
-static void mi_heap_free(mi_heap_t* heap) {
-  mi_assert(heap != NULL);
-  mi_assert_internal(mi_heap_is_initialized(heap));
-  if (heap==NULL || !mi_heap_is_initialized(heap)) return;
-  if (mi_heap_is_backing(heap)) return; // dont free the backing heap
-
-  // reset default
-  if (mi_heap_is_default(heap)) {
-    _mi_heap_set_default_direct(heap->tld->heap_backing);
-  }
-
-  // remove ourselves from the thread local heaps list
-  // linear search but we expect the number of heaps to be relatively small
-  mi_heap_t* prev = NULL;
-  mi_heap_t* curr = heap->tld->heaps;
-  while (curr != heap && curr != NULL) {
-    prev = curr;
-    curr = curr->next;
-  }
-  mi_assert_internal(curr == heap);
-  if (curr == heap) {
-    if (prev != NULL) { prev->next = heap->next; }
-                 else { heap->tld->heaps = heap->next; }
-  }
-  mi_assert_internal(heap->tld->heaps != NULL);
-
-  // and free the used memory
-  mi_free(heap);
-}
-
-
-/* -----------------------------------------------------------
-  Heap destroy
------------------------------------------------------------ */
-
-static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
-  MI_UNUSED(arg1);
-  MI_UNUSED(arg2);
-  MI_UNUSED(heap);
-  MI_UNUSED(pq);
-
-  // ensure no more thread_delayed_free will be added
-  _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
-
-  // stats
-  const size_t bsize = mi_page_block_size(page);
-  if (bsize > MI_MEDIUM_OBJ_SIZE_MAX) {
-    if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
-      mi_heap_stat_decrease(heap, large, bsize);
-    }
-    else {
-      mi_heap_stat_decrease(heap, huge, bsize);
-    }
-  }
-#if (MI_STAT)
-  _mi_page_free_collect(page, false);  // update used count
-  const size_t inuse = page->used;
-  if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
-    mi_heap_stat_decrease(heap, normal, bsize * inuse);
-#if (MI_STAT>1)
-    mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], inuse);
-#endif
-  }
-  mi_heap_stat_decrease(heap, malloc, bsize * inuse);  // todo: off for aligned blocks...
-#endif
-
-  /// pretend it is all free now
-  mi_assert_internal(mi_page_thread_free(page) == NULL);
-  page->used = 0;
-
-  // and free the page
-  // mi_page_free(page,false);
-  page->next = NULL;
-  page->prev = NULL;
-  _mi_segment_page_free(page,false /* no force? */, &heap->tld->segments);
-
-  return true; // keep going
-}
-
-void _mi_heap_destroy_pages(mi_heap_t* heap) {
-  mi_heap_visit_pages(heap, &_mi_heap_page_destroy, NULL, NULL);
-  mi_heap_reset_pages(heap);
-}
-
-void mi_heap_destroy(mi_heap_t* heap) {
-  mi_assert(heap != NULL);
-  mi_assert(mi_heap_is_initialized(heap));
-  mi_assert(heap->no_reclaim);
-  mi_assert_expensive(mi_heap_is_valid(heap));
-  if (heap==NULL || !mi_heap_is_initialized(heap)) return;
-  if (!heap->no_reclaim) {
-    // don't free in case it may contain reclaimed pages
-    mi_heap_delete(heap);
-  }
-  else {
-    // free all pages
-    _mi_heap_destroy_pages(heap);
-    mi_heap_free(heap);
-  }
-}
-
-void _mi_heap_destroy_all(void) {
-  mi_heap_t* bheap = mi_heap_get_backing();
-  mi_heap_t* curr = bheap->tld->heaps;
-  while (curr != NULL) {
-    mi_heap_t* next = curr->next;
-    if (curr->no_reclaim) {
-      mi_heap_destroy(curr);
-    }
-    else {
-      _mi_heap_destroy_pages(curr);
-    }
-    curr = next;
-  }
-}
-
-/* -----------------------------------------------------------
-  Safe Heap delete
------------------------------------------------------------ */
-
-// Transfer the pages from one heap to the other
-static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
-  mi_assert_internal(heap!=NULL);
-  if (from==NULL || from->page_count == 0) return;
-
-  // reduce the size of the delayed frees
-  _mi_heap_delayed_free_partial(from);
-
-  // transfer all pages by appending the queues; this will set a new heap field
-  // so threads may do delayed frees in either heap for a while.
-  // note: appending waits for each page to not be in the `MI_DELAYED_FREEING` state
-  // so after this only the new heap will get delayed frees
-  for (size_t i = 0; i <= MI_BIN_FULL; i++) {
-    mi_page_queue_t* pq = &heap->pages[i];
-    mi_page_queue_t* append = &from->pages[i];
-    size_t pcount = _mi_page_queue_append(heap, pq, append);
-    heap->page_count += pcount;
-    from->page_count -= pcount;
-  }
-  mi_assert_internal(from->page_count == 0);
-
-  // and do outstanding delayed frees in the `from` heap
-  // note: be careful here as the `heap` field in all those pages no longer point to `from`,
-  // turns out to be ok as `_mi_heap_delayed_free` only visits the list and calls a
-  // the regular `_mi_free_delayed_block` which is safe.
-  _mi_heap_delayed_free_all(from);
-  #if !defined(_MSC_VER) || (_MSC_VER > 1900) // somehow the following line gives an error in VS2015, issue #353
-  mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_block_t,&from->thread_delayed_free) == NULL);
-  #endif
-
-  // and reset the `from` heap
-  mi_heap_reset_pages(from);
-}
-
-// Safe delete a heap without freeing any still allocated blocks in that heap.
-void mi_heap_delete(mi_heap_t* heap)
-{
-  mi_assert(heap != NULL);
-  mi_assert(mi_heap_is_initialized(heap));
-  mi_assert_expensive(mi_heap_is_valid(heap));
-  if (heap==NULL || !mi_heap_is_initialized(heap)) return;
-
-  if (!mi_heap_is_backing(heap)) {
-    // tranfer still used pages to the backing heap
-    mi_heap_absorb(heap->tld->heap_backing, heap);
-  }
-  else {
-    // the backing heap abandons its pages
-    _mi_heap_collect_abandon(heap);
-  }
-  mi_assert_internal(heap->page_count==0);
-  mi_heap_free(heap);
-}
-
-mi_heap_t* mi_heap_set_default(mi_heap_t* heap) {
-  mi_assert(heap != NULL);
-  mi_assert(mi_heap_is_initialized(heap));
-  if (heap==NULL || !mi_heap_is_initialized(heap)) return NULL;
-  mi_assert_expensive(mi_heap_is_valid(heap));
-  mi_heap_t* old = mi_get_default_heap();
-  _mi_heap_set_default_direct(heap);
-  return old;
-}
-
-
-
-
-/* -----------------------------------------------------------
-  Analysis
------------------------------------------------------------ */
-
-// static since it is not thread safe to access heaps from other threads.
-static mi_heap_t* mi_heap_of_block(const void* p) {
-  if (p == NULL) return NULL;
-  mi_segment_t* segment = _mi_ptr_segment(p);
-  bool valid = (_mi_ptr_cookie(segment) == segment->cookie);
-  mi_assert_internal(valid);
-  if mi_unlikely(!valid) return NULL;
-  return mi_page_heap(_mi_segment_page_of(segment,p));
-}
-
-bool mi_heap_contains_block(mi_heap_t* heap, const void* p) {
-  mi_assert(heap != NULL);
-  if (heap==NULL || !mi_heap_is_initialized(heap)) return false;
-  return (heap == mi_heap_of_block(p));
-}
-
-
-static bool mi_heap_page_check_owned(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* p, void* vfound) {
-  MI_UNUSED(heap);
-  MI_UNUSED(pq);
-  bool* found = (bool*)vfound;
-  mi_segment_t* segment = _mi_page_segment(page);
-  void* start = _mi_page_start(segment, page, NULL);
-  void* end   = (uint8_t*)start + (page->capacity * mi_page_block_size(page));
-  *found = (p >= start && p < end);
-  return (!*found); // continue if not found
-}
-
-bool mi_heap_check_owned(mi_heap_t* heap, const void* p) {
-  mi_assert(heap != NULL);
-  if (heap==NULL || !mi_heap_is_initialized(heap)) return false;
-  if (((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0) return false;  // only aligned pointers
-  bool found = false;
-  mi_heap_visit_pages(heap, &mi_heap_page_check_owned, (void*)p, &found);
-  return found;
-}
-
-bool mi_check_owned(const void* p) {
-  return mi_heap_check_owned(mi_get_default_heap(), p);
-}
-
-/* -----------------------------------------------------------
-  Visit all heap blocks and areas
-  Todo: enable visiting abandoned pages, and
-        enable visiting all blocks of all heaps across threads
------------------------------------------------------------ */
-
-// Separate struct to keep `mi_page_t` out of the public interface
-typedef struct mi_heap_area_ex_s {
-  mi_heap_area_t area;
-  mi_page_t*     page;
-} mi_heap_area_ex_t;
-
-static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_visit_fun* visitor, void* arg) {
-  mi_assert(xarea != NULL);
-  if (xarea==NULL) return true;
-  const mi_heap_area_t* area = &xarea->area;
-  mi_page_t* page = xarea->page;
-  mi_assert(page != NULL);
-  if (page == NULL) return true;
-
-  _mi_page_free_collect(page,true);
-  mi_assert_internal(page->local_free == NULL);
-  if (page->used == 0) return true;
-
-  const size_t bsize = mi_page_block_size(page);
-  const size_t ubsize = mi_page_usable_block_size(page); // without padding
-  size_t   psize;
-  uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize);
-
-  if (page->capacity == 1) {
-    // optimize page with one block
-    mi_assert_internal(page->used == 1 && page->free == NULL);
-    return visitor(mi_page_heap(page), area, pstart, ubsize, arg);
-  }
-
-  // create a bitmap of free blocks.
-  #define MI_MAX_BLOCKS   (MI_SMALL_PAGE_SIZE / sizeof(void*))
-  uintptr_t free_map[MI_MAX_BLOCKS / sizeof(uintptr_t)];
-  memset(free_map, 0, sizeof(free_map));
-
-  size_t free_count = 0;
-  for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page,block)) {
-    free_count++;
-    mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize));
-    size_t offset = (uint8_t*)block - pstart;
-    mi_assert_internal(offset % bsize == 0);
-    size_t blockidx = offset / bsize;  // Todo: avoid division?
-    mi_assert_internal( blockidx < MI_MAX_BLOCKS);
-    size_t bitidx = (blockidx / sizeof(uintptr_t));
-    size_t bit = blockidx - (bitidx * sizeof(uintptr_t));
-    free_map[bitidx] |= ((uintptr_t)1 << bit);
-  }
-  mi_assert_internal(page->capacity == (free_count + page->used));
-
-  // walk through all blocks skipping the free ones
-  size_t used_count = 0;
-  for (size_t i = 0; i < page->capacity; i++) {
-    size_t bitidx = (i / sizeof(uintptr_t));
-    size_t bit = i - (bitidx * sizeof(uintptr_t));
-    uintptr_t m = free_map[bitidx];
-    if (bit == 0 && m == UINTPTR_MAX) {
-      i += (sizeof(uintptr_t) - 1); // skip a run of free blocks
-    }
-    else if ((m & ((uintptr_t)1 << bit)) == 0) {
-      used_count++;
-      uint8_t* block = pstart + (i * bsize);
-      if (!visitor(mi_page_heap(page), area, block, ubsize, arg)) return false;
-    }
-  }
-  mi_assert_internal(page->used == used_count);
-  return true;
-}
-
-typedef bool (mi_heap_area_visit_fun)(const mi_heap_t* heap, const mi_heap_area_ex_t* area, void* arg);
-
-
-static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* vfun, void* arg) {
-  MI_UNUSED(heap);
-  MI_UNUSED(pq);
-  mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun;
-  mi_heap_area_ex_t xarea;
-  const size_t bsize = mi_page_block_size(page);
-  const size_t ubsize = mi_page_usable_block_size(page);
-  xarea.page = page;
-  xarea.area.reserved = page->reserved * bsize;
-  xarea.area.committed = page->capacity * bsize;
-  xarea.area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL);
-  xarea.area.used = page->used;   // number of blocks in use (#553)
-  xarea.area.block_size = ubsize;
-  xarea.area.full_block_size = bsize;
-  return fun(heap, &xarea, arg);
-}
-
-// Visit all heap pages as areas
-static bool mi_heap_visit_areas(const mi_heap_t* heap, mi_heap_area_visit_fun* visitor, void* arg) {
-  if (visitor == NULL) return false;
-  return mi_heap_visit_pages((mi_heap_t*)heap, &mi_heap_visit_areas_page, (void*)(visitor), arg); // note: function pointer to void* :-{
-}
-
-// Just to pass arguments
-typedef struct mi_visit_blocks_args_s {
-  bool  visit_blocks;
-  mi_block_visit_fun* visitor;
-  void* arg;
-} mi_visit_blocks_args_t;
-
-static bool mi_heap_area_visitor(const mi_heap_t* heap, const mi_heap_area_ex_t* xarea, void* arg) {
-  mi_visit_blocks_args_t* args = (mi_visit_blocks_args_t*)arg;
-  if (!args->visitor(heap, &xarea->area, NULL, xarea->area.block_size, args->arg)) return false;
-  if (args->visit_blocks) {
-    return mi_heap_area_visit_blocks(xarea, args->visitor, args->arg);
-  }
-  else {
-    return true;
-  }
-}
-
-// Visit all blocks in a heap
-bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
-  mi_visit_blocks_args_t args = { visit_blocks, visitor, arg };
-  return mi_heap_visit_areas(heap, &mi_heap_area_visitor, &args);
-}
diff --git a/3rd/mimalloc-2.0.9/src/init.c b/3rd/mimalloc-2.0.9/src/init.c
deleted file mode 100644
index c416208c..00000000
--- a/3rd/mimalloc-2.0.9/src/init.c
+++ /dev/null
@@ -1,716 +0,0 @@
-/* ----------------------------------------------------------------------------
-Copyright (c) 2018-2022, Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
------------------------------------------------------------------------------*/
-#include "mimalloc.h"
-#include "mimalloc-internal.h"
-
-#include <string.h>  // memcpy, memset
-#include <stdlib.h>  // atexit
-
-// Empty page used to initialize the small free pages array
-const mi_page_t _mi_page_empty = {
-  0, false, false, false, false,
-  0,       // capacity
-  0,       // reserved capacity
-  { 0 },   // flags
-  false,   // is_zero
-  0,       // retire_expire
-  NULL,    // free
-  0,       // used
-  0,       // xblock_size
-  NULL,    // local_free
-  #if MI_ENCODE_FREELIST
-  { 0, 0 },
-  #endif
-  MI_ATOMIC_VAR_INIT(0), // xthread_free
-  MI_ATOMIC_VAR_INIT(0), // xheap
-  NULL, NULL
-  #if MI_INTPTR_SIZE==8
-  , { 0 }  // padding
-  #endif
-};
-
-#define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty)
-
-#if (MI_PADDING>0) && (MI_INTPTR_SIZE >= 8)
-#define MI_SMALL_PAGES_EMPTY  { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() }
-#elif (MI_PADDING>0)
-#define MI_SMALL_PAGES_EMPTY  { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() }
-#else
-#define MI_SMALL_PAGES_EMPTY  { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY() }
-#endif
-
-
-// Empty page queues for every bin
-#define QNULL(sz)  { NULL, NULL, (sz)*sizeof(uintptr_t) }
-#define MI_PAGE_QUEUES_EMPTY \
-  { QNULL(1), \
-    QNULL(     1), QNULL(     2), QNULL(     3), QNULL(     4), QNULL(     5), QNULL(     6), QNULL(     7), QNULL(     8), /* 8 */ \
-    QNULL(    10), QNULL(    12), QNULL(    14), QNULL(    16), QNULL(    20), QNULL(    24), QNULL(    28), QNULL(    32), /* 16 */ \
-    QNULL(    40), QNULL(    48), QNULL(    56), QNULL(    64), QNULL(    80), QNULL(    96), QNULL(   112), QNULL(   128), /* 24 */ \
-    QNULL(   160), QNULL(   192), QNULL(   224), QNULL(   256), QNULL(   320), QNULL(   384), QNULL(   448), QNULL(   512), /* 32 */ \
-    QNULL(   640), QNULL(   768), QNULL(   896), QNULL(  1024), QNULL(  1280), QNULL(  1536), QNULL(  1792), QNULL(  2048), /* 40 */ \
-    QNULL(  2560), QNULL(  3072), QNULL(  3584), QNULL(  4096), QNULL(  5120), QNULL(  6144), QNULL(  7168), QNULL(  8192), /* 48 */ \
-    QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \
-    QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \
-    QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), QNULL(393216), QNULL(458752), QNULL(524288), /* 72 */ \
-    QNULL(MI_MEDIUM_OBJ_WSIZE_MAX + 1  /* 655360, Huge queue */), \
-    QNULL(MI_MEDIUM_OBJ_WSIZE_MAX + 2) /* Full queue */ }
-
-#define MI_STAT_COUNT_NULL()  {0,0,0,0}
-
-// Empty statistics
-#if MI_STAT>1
-#define MI_STAT_COUNT_END_NULL()  , { MI_STAT_COUNT_NULL(), MI_INIT32(MI_STAT_COUNT_NULL) }
-#else
-#define MI_STAT_COUNT_END_NULL()
-#endif
-
-#define MI_STATS_NULL  \
-  MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
-  MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
-  MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
-  MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
-  MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
-  MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
-  MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
-  { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },     \
-  { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \
-  MI_STAT_COUNT_END_NULL()
-
-
-// Empty slice span queues for every bin
-#define SQNULL(sz)  { NULL, NULL, sz }
-#define MI_SEGMENT_SPAN_QUEUES_EMPTY \
-  { SQNULL(1), \
-    SQNULL(     1), SQNULL(     2), SQNULL(     3), SQNULL(     4), SQNULL(     5), SQNULL(     6), SQNULL(     7), SQNULL(    10), /*  8 */ \
-    SQNULL(    12), SQNULL(    14), SQNULL(    16), SQNULL(    20), SQNULL(    24), SQNULL(    28), SQNULL(    32), SQNULL(    40), /* 16 */ \
-    SQNULL(    48), SQNULL(    56), SQNULL(    64), SQNULL(    80), SQNULL(    96), SQNULL(   112), SQNULL(   128), SQNULL(   160), /* 24 */ \
-    SQNULL(   192), SQNULL(   224), SQNULL(   256), SQNULL(   320), SQNULL(   384), SQNULL(   448), SQNULL(   512), SQNULL(   640), /* 32 */ \
-    SQNULL(   768), SQNULL(   896), SQNULL(  1024) /* 35 */ }
-
-
-// --------------------------------------------------------
-// Statically allocate an empty heap as the initial
-// thread local value for the default heap,
-// and statically allocate the backing heap for the main
-// thread so it can function without doing any allocation
-// itself (as accessing a thread local for the first time
-// may lead to allocation itself on some platforms)
-// --------------------------------------------------------
-
-mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
-  NULL,
-  MI_SMALL_PAGES_EMPTY,
-  MI_PAGE_QUEUES_EMPTY,
-  MI_ATOMIC_VAR_INIT(NULL),
-  0,                // tid
-  0,                // cookie
-  0,                // arena id
-  { 0, 0 },         // keys
-  { {0}, {0}, 0, true }, // random
-  0,                // page count
-  MI_BIN_FULL, 0,   // page retired min/max
-  NULL,             // next
-  false
-};
-
-#define tld_empty_stats  ((mi_stats_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,stats)))
-#define tld_empty_os     ((mi_os_tld_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,os)))
-
-mi_decl_cache_align static const mi_tld_t tld_empty = {
-  0,
-  false,
-  NULL, NULL,
-  { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, tld_empty_stats, tld_empty_os }, // segments
-  { 0, tld_empty_stats }, // os
-  { MI_STATS_NULL }       // stats
-};
-
-// the thread-local default heap for allocation
-mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;
-
-extern mi_heap_t _mi_heap_main;
-
-static mi_tld_t tld_main = {
-  0, false,
-  &_mi_heap_main, & _mi_heap_main,
-  { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, &tld_main.stats, &tld_main.os }, // segments
-  { 0, &tld_main.stats },  // os
-  { MI_STATS_NULL }       // stats
-};
-
-mi_heap_t _mi_heap_main = {
-  &tld_main,
-  MI_SMALL_PAGES_EMPTY,
-  MI_PAGE_QUEUES_EMPTY,
-  MI_ATOMIC_VAR_INIT(NULL),
-  0,                // thread id
-  0,                // initial cookie
-  0,                // arena id
-  { 0, 0 },         // the key of the main heap can be fixed (unlike page keys that need to be secure!)
-  { {0x846ca68b}, {0}, 0, true },  // random
-  0,                // page count
-  MI_BIN_FULL, 0,   // page retired min/max
-  NULL,             // next heap
-  false             // can reclaim
-};
-
-bool _mi_process_is_initialized = false;  // set to `true` in `mi_process_init`.
-
-mi_stats_t _mi_stats_main = { MI_STATS_NULL };
-
-
-static void mi_heap_main_init(void) {
-  if (_mi_heap_main.cookie == 0) {
-    _mi_heap_main.thread_id = _mi_thread_id();
-    _mi_heap_main.cookie = 1;
-    #if defined(_WIN32) && !defined(MI_SHARED_LIB)
-      _mi_random_init_weak(&_mi_heap_main.random);    // prevent allocation failure during bcrypt dll initialization with static linking
-    #else
-      _mi_random_init(&_mi_heap_main.random);
-    #endif
-    _mi_heap_main.cookie  = _mi_heap_random_next(&_mi_heap_main);
-    _mi_heap_main.keys[0] = _mi_heap_random_next(&_mi_heap_main);
-    _mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main);
-  }
-}
-
-mi_heap_t* _mi_heap_main_get(void) {
-  mi_heap_main_init();
-  return &_mi_heap_main;
-}
-
-
-/* -----------------------------------------------------------
-  Initialization and freeing of the thread local heaps
------------------------------------------------------------ */
-
-// note: in x64 in release build `sizeof(mi_thread_data_t)` is under 4KiB (= OS page size).
-typedef struct mi_thread_data_s {
-  mi_heap_t  heap;  // must come first due to cast in `_mi_heap_done`
-  mi_tld_t   tld;
-} mi_thread_data_t;
-
-
-// Thread meta-data is allocated directly from the OS. For
-// some programs that do not use thread pools and allocate and
-// destroy many OS threads, this may causes too much overhead
-// per thread so we maintain a small cache of recently freed metadata.
-
-#define TD_CACHE_SIZE (8)
-static _Atomic(mi_thread_data_t*) td_cache[TD_CACHE_SIZE];
-
-static mi_thread_data_t* mi_thread_data_alloc(void) {
-  // try to find thread metadata in the cache
-  mi_thread_data_t* td;
-  for (int i = 0; i < TD_CACHE_SIZE; i++) {
-    td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]);
-    if (td != NULL) {
-      td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL);
-      if (td != NULL) {
-        return td;
-      }
-    }
-  }
-  // if that fails, allocate directly from the OS
-  td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main);
-  if (td == NULL) {
-    // if this fails, try once more. (issue #257)
-    td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main);
-    if (td == NULL) {
-      // really out of memory
-      _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t));
-    }
-  }
-  return td;
-}
-
-static void mi_thread_data_free( mi_thread_data_t* tdfree ) {
-  // try to add the thread metadata to the cache
-  for (int i = 0; i < TD_CACHE_SIZE; i++) {
-    mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]);
-    if (td == NULL) {
-      mi_thread_data_t* expected = NULL;
-      if (mi_atomic_cas_ptr_weak_acq_rel(mi_thread_data_t, &td_cache[i], &expected, tdfree)) {
-        return;
-      }
-    }
-  }
-  // if that fails, just free it directly
-  _mi_os_free(tdfree, sizeof(mi_thread_data_t), &_mi_stats_main);
-}
-
-static void mi_thread_data_collect(void) {
-  // free all thread metadata from the cache
-  for (int i = 0; i < TD_CACHE_SIZE; i++) {
-    mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]);
-    if (td != NULL) {
-      td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL);
-      if (td != NULL) {
-        _mi_os_free( td, sizeof(mi_thread_data_t), &_mi_stats_main );
-      }
-    }
-  }
-}
-
-// Initialize the thread local default heap, called from `mi_thread_init`
-static bool _mi_heap_init(void) {
-  if (mi_heap_is_initialized(mi_get_default_heap())) return true;
-  if (_mi_is_main_thread()) {
-    // mi_assert_internal(_mi_heap_main.thread_id != 0);  // can happen on freeBSD where alloc is called before any initialization
-    // the main heap is statically allocated
-    mi_heap_main_init();
-    _mi_heap_set_default_direct(&_mi_heap_main);
-    //mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap());
-  }
-  else {
-    // use `_mi_os_alloc` to allocate directly from the OS
-    mi_thread_data_t* td = mi_thread_data_alloc();
-    if (td == NULL) return false;
-
-    // OS allocated so already zero initialized
-    mi_tld_t*  tld = &td->tld;
-    mi_heap_t* heap = &td->heap;
-    _mi_memcpy_aligned(tld, &tld_empty, sizeof(*tld));
-    _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(*heap));
-    heap->thread_id = _mi_thread_id();
-    _mi_random_init(&heap->random);
-    heap->cookie  = _mi_heap_random_next(heap) | 1;
-    heap->keys[0] = _mi_heap_random_next(heap);
-    heap->keys[1] = _mi_heap_random_next(heap);
-    heap->tld = tld;
-    tld->heap_backing = heap;
-    tld->heaps = heap;
-    tld->segments.stats = &tld->stats;
-    tld->segments.os = &tld->os;
-    tld->os.stats = &tld->stats;
-    _mi_heap_set_default_direct(heap);
-  }
-  return false;
-}
-
-// Free the thread local default heap (called from `mi_thread_done`)
-static bool _mi_heap_done(mi_heap_t* heap) {
-  if (!mi_heap_is_initialized(heap)) return true;
-
-  // reset default heap
-  _mi_heap_set_default_direct(_mi_is_main_thread() ? &_mi_heap_main : (mi_heap_t*)&_mi_heap_empty);
-
-  // switch to backing heap
-  heap = heap->tld->heap_backing;
-  if (!mi_heap_is_initialized(heap)) return false;
-
-  // delete all non-backing heaps in this thread
-  mi_heap_t* curr = heap->tld->heaps;
-  while (curr != NULL) {
-    mi_heap_t* next = curr->next; // save `next` as `curr` will be freed
-    if (curr != heap) {
-      mi_assert_internal(!mi_heap_is_backing(curr));
-      mi_heap_delete(curr);
-    }
-    curr = next;
-  }
-  mi_assert_internal(heap->tld->heaps == heap && heap->next == NULL);
-  mi_assert_internal(mi_heap_is_backing(heap));
-
-  // collect if not the main thread
-  if (heap != &_mi_heap_main) {
-    _mi_heap_collect_abandon(heap);
-  }
-
-  // merge stats
-  _mi_stats_done(&heap->tld->stats);
-
-  // free if not the main thread
-  if (heap != &_mi_heap_main) {
-    // the following assertion does not always hold for huge segments as those are always treated
-    // as abondened: one may allocate it in one thread, but deallocate in another in which case
-    // the count can be too large or negative. todo: perhaps not count huge segments? see issue #363
-    // mi_assert_internal(heap->tld->segments.count == 0 || heap->thread_id != _mi_thread_id());
-    mi_thread_data_free((mi_thread_data_t*)heap);
-  }
-  else {
-    mi_thread_data_collect(); // free cached thread metadata
-    #if 0
-    // never free the main thread even in debug mode; if a dll is linked statically with mimalloc,
-    // there may still be delete/free calls after the mi_fls_done is called. Issue #207
-    _mi_heap_destroy_pages(heap);
-    mi_assert_internal(heap->tld->heap_backing == &_mi_heap_main);
-    #endif
-  }
-  return false;
-}
-
-
-
-// --------------------------------------------------------
-// Try to run `mi_thread_done()` automatically so any memory
-// owned by the thread but not yet released can be abandoned
-// and re-owned by another thread.
-//
-// 1. windows dynamic library:
-//     call from DllMain on DLL_THREAD_DETACH
-// 2. windows static library:
-//     use `FlsAlloc` to call a destructor when the thread is done
-// 3. unix, pthreads:
-//     use a pthread key to call a destructor when a pthread is done
-//
-// In the last two cases we also need to call `mi_process_init`
-// to set up the thread local keys.
-// --------------------------------------------------------
-
-static void _mi_thread_done(mi_heap_t* default_heap);
-
-#if defined(_WIN32) && defined(MI_SHARED_LIB)
-  // nothing to do as it is done in DllMain
-#elif defined(_WIN32) && !defined(MI_SHARED_LIB)
-  // use thread local storage keys to detect thread ending
-  #include <windows.h>
-  #include <fibersapi.h>
-  #if (_WIN32_WINNT < 0x600)  // before Windows Vista
-  WINBASEAPI DWORD WINAPI FlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback );
-  WINBASEAPI PVOID WINAPI FlsGetValue( _In_ DWORD dwFlsIndex );
-  WINBASEAPI BOOL  WINAPI FlsSetValue( _In_ DWORD dwFlsIndex, _In_opt_ PVOID lpFlsData );
-  WINBASEAPI BOOL  WINAPI FlsFree(_In_ DWORD dwFlsIndex);
-  #endif
-  static DWORD mi_fls_key = (DWORD)(-1);
-  static void NTAPI mi_fls_done(PVOID value) {
-    mi_heap_t* heap = (mi_heap_t*)value;
-    if (heap != NULL) {
-      _mi_thread_done(heap);
-      FlsSetValue(mi_fls_key, NULL);  // prevent recursion as _mi_thread_done may set it back to the main heap, issue #672
-    }
-  }
-#elif defined(MI_USE_PTHREADS)
-  // use pthread local storage keys to detect thread ending
-  // (and used with MI_TLS_PTHREADS for the default heap)
-  pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1);
-  static void mi_pthread_done(void* value) {
-    if (value!=NULL) _mi_thread_done((mi_heap_t*)value);
-  }
-#elif defined(__wasi__)
-// no pthreads in the WebAssembly Standard Interface
-#else
-  #pragma message("define a way to call mi_thread_done when a thread is done")
-#endif
-
-// Set up handlers so `mi_thread_done` is called automatically
-static void mi_process_setup_auto_thread_done(void) {
-  static bool tls_initialized = false; // fine if it races
-  if (tls_initialized) return;
-  tls_initialized = true;
-  #if defined(_WIN32) && defined(MI_SHARED_LIB)
-    // nothing to do as it is done in DllMain
-  #elif defined(_WIN32) && !defined(MI_SHARED_LIB)
-    mi_fls_key = FlsAlloc(&mi_fls_done);
-  #elif defined(MI_USE_PTHREADS)
-    mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1));
-    pthread_key_create(&_mi_heap_default_key, &mi_pthread_done);
-  #endif
-  _mi_heap_set_default_direct(&_mi_heap_main);
-}
-
-
-bool _mi_is_main_thread(void) {
-  return (_mi_heap_main.thread_id==0 || _mi_heap_main.thread_id == _mi_thread_id());
-}
-
-static _Atomic(size_t) thread_count = MI_ATOMIC_VAR_INIT(1);
-
-size_t  _mi_current_thread_count(void) {
-  return mi_atomic_load_relaxed(&thread_count);
-}
-
-// This is called from the `mi_malloc_generic`
-void mi_thread_init(void) mi_attr_noexcept
-{
-  // ensure our process has started already
-  mi_process_init();
-
-  // initialize the thread local default heap
-  // (this will call `_mi_heap_set_default_direct` and thus set the
-  //  fiber/pthread key to a non-zero value, ensuring `_mi_thread_done` is called)
-  if (_mi_heap_init()) return;  // returns true if already initialized
-
-  _mi_stat_increase(&_mi_stats_main.threads, 1);
-  mi_atomic_increment_relaxed(&thread_count);
-  //_mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id());
-}
-
-void mi_thread_done(void) mi_attr_noexcept {
-  _mi_thread_done(mi_get_default_heap());
-}
-
-static void _mi_thread_done(mi_heap_t* heap) {
-  mi_atomic_decrement_relaxed(&thread_count);
-  _mi_stat_decrease(&_mi_stats_main.threads, 1);
-
-  // check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local heaps...
-  if (heap->thread_id != _mi_thread_id()) return;
-
-  // abandon the thread local heap
-  if (_mi_heap_done(heap)) return;  // returns true if already ran
-}
-
-void _mi_heap_set_default_direct(mi_heap_t* heap)  {
-  mi_assert_internal(heap != NULL);
-  #if defined(MI_TLS_SLOT)
-  mi_tls_slot_set(MI_TLS_SLOT,heap);
-  #elif defined(MI_TLS_PTHREAD_SLOT_OFS)
-  *mi_tls_pthread_heap_slot() = heap;
-  #elif defined(MI_TLS_PTHREAD)
-  // we use _mi_heap_default_key
-  #else
-  _mi_heap_default = heap;
-  #endif
-
-  // ensure the default heap is passed to `_mi_thread_done`
-  // setting to a non-NULL value also ensures `mi_thread_done` is called.
-  #if defined(_WIN32) && defined(MI_SHARED_LIB)
-    // nothing to do as it is done in DllMain
-  #elif defined(_WIN32) && !defined(MI_SHARED_LIB)
-    mi_assert_internal(mi_fls_key != 0);
-    FlsSetValue(mi_fls_key, heap);
-  #elif defined(MI_USE_PTHREADS)
-  if (_mi_heap_default_key != (pthread_key_t)(-1)) {  // can happen during recursive invocation on freeBSD
-    pthread_setspecific(_mi_heap_default_key, heap);
-  }
-  #endif
-}
-
-
-// --------------------------------------------------------
-// Run functions on process init/done, and thread init/done
-// --------------------------------------------------------
-static void mi_cdecl mi_process_done(void);
-
-static bool os_preloading = true;    // true until this module is initialized
-static bool mi_redirected = false;   // true if malloc redirects to mi_malloc
-
-// Returns true if this module has not been initialized; Don't use C runtime routines until it returns false.
-bool _mi_preloading(void) {
-  return os_preloading;
-}
-
-mi_decl_nodiscard bool mi_is_redirected(void) mi_attr_noexcept {
-  return mi_redirected;
-}
-
-// Communicate with the redirection module on Windows
-#if defined(_WIN32) && defined(MI_SHARED_LIB) && !defined(MI_WIN_NOREDIRECT)
-#ifdef __cplusplus
-extern "C" {
-#endif
-mi_decl_export void _mi_redirect_entry(DWORD reason) {
-  // called on redirection; careful as this may be called before DllMain
-  if (reason == DLL_PROCESS_ATTACH) {
-    mi_redirected = true;
-  }
-  else if (reason == DLL_PROCESS_DETACH) {
-    mi_redirected = false;
-  }
-  else if (reason == DLL_THREAD_DETACH) {
-    mi_thread_done();
-  }
-}
-__declspec(dllimport) bool mi_cdecl mi_allocator_init(const char** message);
-__declspec(dllimport) void mi_cdecl mi_allocator_done(void);
-#ifdef __cplusplus
-}
-#endif
-#else
-static bool mi_allocator_init(const char** message) {
-  if (message != NULL) *message = NULL;
-  return true;
-}
-static void mi_allocator_done(void) {
-  // nothing to do
-}
-#endif
-
-// Called once by the process loader
-static void mi_process_load(void) {
-  mi_heap_main_init();
-  #if defined(MI_TLS_RECURSE_GUARD)
-  volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true;
-  MI_UNUSED(dummy);
-  #endif
-  os_preloading = false;
-  mi_assert_internal(_mi_is_main_thread());
-  #if !(defined(_WIN32) && defined(MI_SHARED_LIB))  // use Dll process detach (see below) instead of atexit (issue #521)
-  atexit(&mi_process_done);
-  #endif
-  _mi_options_init();
-  mi_process_setup_auto_thread_done();
-  mi_process_init();
-  if (mi_redirected) _mi_verbose_message("malloc is redirected.\n");
-
-  // show message from the redirector (if present)
-  const char* msg = NULL;
-  mi_allocator_init(&msg);
-  if (msg != NULL && (mi_option_is_enabled(mi_option_verbose) || mi_option_is_enabled(mi_option_show_errors))) {
-    _mi_fputs(NULL,NULL,NULL,msg);
-  }
-
-  // reseed random
-  _mi_random_reinit_if_weak(&_mi_heap_main.random);
-}
-
-#if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
-#include <intrin.h>
-mi_decl_cache_align bool _mi_cpu_has_fsrm = false;
-
-static void mi_detect_cpu_features(void) {
-  // FSRM for fast rep movsb support (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017))
-  int32_t cpu_info[4];
-  __cpuid(cpu_info, 7);
-  _mi_cpu_has_fsrm = ((cpu_info[3] & (1 << 4)) != 0); // bit 4 of EDX : see <https ://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features>
-}
-#else
-static void mi_detect_cpu_features(void) {
-  // nothing
-}
-#endif
-
-// Initialize the process; called by thread_init or the process loader
-void mi_process_init(void) mi_attr_noexcept {
-  // ensure we are called once
-  if (_mi_process_is_initialized) return;
-  _mi_verbose_message("process init: 0x%zx\n", _mi_thread_id());
-  _mi_process_is_initialized = true;
-  mi_process_setup_auto_thread_done();
-
-  mi_detect_cpu_features();
-  _mi_os_init();
-  mi_heap_main_init();
-  #if (MI_DEBUG)
-  _mi_verbose_message("debug level : %d\n", MI_DEBUG);
-  #endif
-  _mi_verbose_message("secure level: %d\n", MI_SECURE);
-  _mi_verbose_message("mem tracking: %s\n", MI_TRACK_TOOL);
-  mi_thread_init();
-
-  #if defined(_WIN32) && !defined(MI_SHARED_LIB)
-  // When building as a static lib the FLS cleanup happens to early for the main thread.
-  // To avoid this, set the FLS value for the main thread to NULL so the fls cleanup
-  // will not call _mi_thread_done on the (still executing) main thread. See issue #508.
-  FlsSetValue(mi_fls_key, NULL);
-  #endif
-
-  mi_stats_reset();  // only call stat reset *after* thread init (or the heap tld == NULL)
-
-  if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
-    size_t pages = mi_option_get_clamp(mi_option_reserve_huge_os_pages, 0, 128*1024);
-    long reserve_at = mi_option_get(mi_option_reserve_huge_os_pages_at);
-    if (reserve_at != -1) {
-      mi_reserve_huge_os_pages_at(pages, reserve_at, pages*500);
-    } else {
-      mi_reserve_huge_os_pages_interleave(pages, 0, pages*500);
-    }
-  }
-  if (mi_option_is_enabled(mi_option_reserve_os_memory)) {
-    long ksize = mi_option_get(mi_option_reserve_os_memory);
-    if (ksize > 0) {
-      mi_reserve_os_memory((size_t)ksize*MI_KiB, true /* commit? */, true /* allow large pages? */);
-    }
-  }
-}
-
-// Called when the process is done (through `at_exit`)
-static void mi_cdecl mi_process_done(void) {
-  // only shutdown if we were initialized
-  if (!_mi_process_is_initialized) return;
-  // ensure we are called once
-  static bool process_done = false;
-  if (process_done) return;
-  process_done = true;
-
-  #if defined(_WIN32) && !defined(MI_SHARED_LIB)
-  FlsFree(mi_fls_key);  // call thread-done on all threads (except the main thread) to prevent dangling callback pointer if statically linked with a DLL; Issue #208
-  #endif
-
-  #ifndef MI_SKIP_COLLECT_ON_EXIT
-    #if (MI_DEBUG != 0) || !defined(MI_SHARED_LIB)
-    // free all memory if possible on process exit. This is not needed for a stand-alone process
-    // but should be done if mimalloc is statically linked into another shared library which
-    // is repeatedly loaded/unloaded, see issue #281.
-    mi_collect(true /* force */ );
-    #endif
-  #endif
-
-  // Forcefully release all retained memory; this can be dangerous in general if overriding regular malloc/free
-  // since after process_done there might still be other code running that calls `free` (like at_exit routines,
-  // or C-runtime termination code.
-  if (mi_option_is_enabled(mi_option_destroy_on_exit)) {
-    _mi_heap_destroy_all();                          // forcefully release all memory held by all heaps (of this thread only!)
-    _mi_segment_cache_free_all(&_mi_heap_main_get()->tld->os);  // release all cached segments
-  }
-
-  if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) {
-    mi_stats_print(NULL);
-  }
-  mi_allocator_done();
-  _mi_verbose_message("process done: 0x%zx\n", _mi_heap_main.thread_id);
-  os_preloading = true; // don't call the C runtime anymore
-}
-
-
-
-#if defined(_WIN32) && defined(MI_SHARED_LIB)
-  // Windows DLL: easy to hook into process_init and thread_done
-  __declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) {
-    MI_UNUSED(reserved);
-    MI_UNUSED(inst);
-    if (reason==DLL_PROCESS_ATTACH) {
-      mi_process_load();
-    }
-    else if (reason==DLL_PROCESS_DETACH) {
-      mi_process_done();
-    }
-    else if (reason==DLL_THREAD_DETACH) {
-      if (!mi_is_redirected()) {
-        mi_thread_done();
-      }
-    }
-    return TRUE;
-  }
-
-#elif defined(_MSC_VER)
-  // MSVC: use data section magic for static libraries
-  // See <https://www.codeguru.com/cpp/misc/misc/applicationcontrol/article.php/c6945/Running-Code-Before-and-After-Main.htm>
-  static int _mi_process_init(void) {
-    mi_process_load();
-    return 0;
-  }
-  typedef int(*_mi_crt_callback_t)(void);
-  #if defined(_M_X64) || defined(_M_ARM64)
-    __pragma(comment(linker, "/include:" "_mi_msvc_initu"))
-    #pragma section(".CRT$XIU", long, read)
-  #else
-    __pragma(comment(linker, "/include:" "__mi_msvc_initu"))
-  #endif
-  #pragma data_seg(".CRT$XIU")
-  mi_decl_externc _mi_crt_callback_t _mi_msvc_initu[] = { &_mi_process_init };
-  #pragma data_seg()
-
-#elif defined(__cplusplus)
-  // C++: use static initialization to detect process start
-  static bool _mi_process_init(void) {
-    mi_process_load();
-    return (_mi_heap_main.thread_id != 0);
-  }
-  static bool mi_initialized = _mi_process_init();
-
-#elif defined(__GNUC__) || defined(__clang__)
-  // GCC,Clang: use the constructor attribute
-  static void __attribute__((constructor)) _mi_process_init(void) {
-    mi_process_load();
-  }
-
-#else
-#pragma message("define a way to call mi_process_load on your platform")
-#endif
diff --git a/3rd/mimalloc-2.0.9/src/options.c b/3rd/mimalloc-2.0.9/src/options.c
deleted file mode 100644
index 0a82ca65..00000000
--- a/3rd/mimalloc-2.0.9/src/options.c
+++ /dev/null
@@ -1,642 +0,0 @@
-/* ----------------------------------------------------------------------------
-Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
------------------------------------------------------------------------------*/
-#include "mimalloc.h"
-#include "mimalloc-internal.h"
-#include "mimalloc-atomic.h"
-
-#include <stdio.h>
-#include <stdlib.h> // strtol
-#include <string.h> // strncpy, strncat, strlen, strstr
-#include <ctype.h>  // toupper
-#include <stdarg.h>
-
-#ifdef _MSC_VER
-#pragma warning(disable:4996)   // strncpy, strncat
-#endif
-
-
-static long mi_max_error_count   = 16; // stop outputting errors after this (use < 0 for no limit)
-static long mi_max_warning_count = 16; // stop outputting warnings after this (use < 0 for no limit)
-
-static void mi_add_stderr_output(void);
-
-int mi_version(void) mi_attr_noexcept {
-  return MI_MALLOC_VERSION;
-}
-
-#ifdef _WIN32
-#include <conio.h>
-#endif
-
-// --------------------------------------------------------
-// Options
-// These can be accessed by multiple threads and may be
-// concurrently initialized, but an initializing data race
-// is ok since they resolve to the same value.
-// --------------------------------------------------------
-typedef enum mi_init_e {
-  UNINIT,       // not yet initialized
-  DEFAULTED,    // not found in the environment, use default value
-  INITIALIZED   // found in environment or set explicitly
-} mi_init_t;
-
-typedef struct mi_option_desc_s {
-  long        value;  // the value
-  mi_init_t   init;   // is it initialized yet? (from the environment)
-  mi_option_t option; // for debugging: the option index should match the option
-  const char* name;   // option name without `mimalloc_` prefix
-  const char* legacy_name; // potential legacy v1.x option name
-} mi_option_desc_t;
-
-#define MI_OPTION(opt)                  mi_option_##opt, #opt, NULL
-#define MI_OPTION_LEGACY(opt,legacy)    mi_option_##opt, #opt, #legacy
-
-static mi_option_desc_t options[_mi_option_last] =
-{
-  // stable options
-  #if MI_DEBUG || defined(MI_SHOW_ERRORS)
-  { 1, UNINIT, MI_OPTION(show_errors) },
-  #else
-  { 0, UNINIT, MI_OPTION(show_errors) },
-  #endif
-  { 0, UNINIT, MI_OPTION(show_stats) },
-  { 0, UNINIT, MI_OPTION(verbose) },
-
-  // Some of the following options are experimental and not all combinations are valid. Use with care.
-  { 1, UNINIT, MI_OPTION(eager_commit) },        // commit per segment directly (8MiB)  (but see also `eager_commit_delay`)
-  { 0, UNINIT, MI_OPTION(deprecated_eager_region_commit) },
-  { 0, UNINIT, MI_OPTION(deprecated_reset_decommits) },
-  { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
-  { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },  // per 1GiB huge pages
-  { -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N
-  { 0, UNINIT, MI_OPTION(reserve_os_memory)     },
-  { 0, UNINIT, MI_OPTION(deprecated_segment_cache) },  // cache N segments per thread
-  { 0, UNINIT, MI_OPTION(page_reset) },          // reset page memory on free
-  { 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_decommit, abandoned_page_reset) },// decommit free page memory when a thread terminates  
-  { 0, UNINIT, MI_OPTION(deprecated_segment_reset) },
-  #if defined(__NetBSD__)
-  { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
-  #elif defined(_WIN32)
-  { 4, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
-  #else
-  { 1, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
-  #endif
-  { 25,   UNINIT, MI_OPTION_LEGACY(decommit_delay, reset_delay) }, // page decommit delay in milli-seconds
-  { 0,    UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
-  { 0,    UNINIT, MI_OPTION(limit_os_alloc) },    // 1 = do not use OS memory for allocation (but only reserved arenas)
-  { 100,  UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
-  { 16,   UNINIT, MI_OPTION(max_errors) },        // maximum errors that are output
-  { 16,   UNINIT, MI_OPTION(max_warnings) },      // maximum warnings that are output
-  { 8,    UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try.  
-  { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit slices when no longer used (after decommit_delay milli-seconds)
-  { 500,  UNINIT, MI_OPTION(segment_decommit_delay) }, // decommit delay in milli-seconds for freed segments
-  { 1,    UNINIT, MI_OPTION(decommit_extend_delay) },
-  { 0,    UNINIT, MI_OPTION(destroy_on_exit)}     // release all OS memory on process exit; careful with dangling pointer or after-exit frees!
-};
-
-static void mi_option_init(mi_option_desc_t* desc);
-
-void _mi_options_init(void) {
-  // called on process load; should not be called before the CRT is initialized!
-  // (e.g. do not call this from process_init as that may run before CRT initialization)
-  mi_add_stderr_output(); // now it safe to use stderr for output
-  for(int i = 0; i < _mi_option_last; i++ ) {
-    mi_option_t option = (mi_option_t)i;
-    long l = mi_option_get(option); MI_UNUSED(l); // initialize
-    // if (option != mi_option_verbose)
-    {
-      mi_option_desc_t* desc = &options[option];
-      _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value);
-    }
-  }
-  mi_max_error_count = mi_option_get(mi_option_max_errors);
-  mi_max_warning_count = mi_option_get(mi_option_max_warnings);
-}
-
-mi_decl_nodiscard long mi_option_get(mi_option_t option) {
-  mi_assert(option >= 0 && option < _mi_option_last);
-  if (option < 0 || option >= _mi_option_last) return 0;
-  mi_option_desc_t* desc = &options[option];
-  mi_assert(desc->option == option);  // index should match the option
-  if mi_unlikely(desc->init == UNINIT) {
-    mi_option_init(desc);
-  }
-  return desc->value;
-}
-
-mi_decl_nodiscard long mi_option_get_clamp(mi_option_t option, long min, long max) {
-  long x = mi_option_get(option);
-  return (x < min ? min : (x > max ? max : x));
-}
-
-void mi_option_set(mi_option_t option, long value) {
-  mi_assert(option >= 0 && option < _mi_option_last);
-  if (option < 0 || option >= _mi_option_last) return;
-  mi_option_desc_t* desc = &options[option];
-  mi_assert(desc->option == option);  // index should match the option
-  desc->value = value;
-  desc->init = INITIALIZED;
-}
-
-void mi_option_set_default(mi_option_t option, long value) {
-  mi_assert(option >= 0 && option < _mi_option_last);
-  if (option < 0 || option >= _mi_option_last) return;
-  mi_option_desc_t* desc = &options[option];
-  if (desc->init != INITIALIZED) {
-    desc->value = value;
-  }
-}
-
-mi_decl_nodiscard bool mi_option_is_enabled(mi_option_t option) {
-  return (mi_option_get(option) != 0);
-}
-
-void mi_option_set_enabled(mi_option_t option, bool enable) {
-  mi_option_set(option, (enable ? 1 : 0));
-}
-
-void mi_option_set_enabled_default(mi_option_t option, bool enable) {
-  mi_option_set_default(option, (enable ? 1 : 0));
-}
-
-void mi_option_enable(mi_option_t option) {
-  mi_option_set_enabled(option,true);
-}
-
-void mi_option_disable(mi_option_t option) {
-  mi_option_set_enabled(option,false);
-}
-
-
-static void mi_cdecl mi_out_stderr(const char* msg, void* arg) {
-  MI_UNUSED(arg);
-  if (msg == NULL) return;
-  #ifdef _WIN32
-  // on windows with redirection, the C runtime cannot handle locale dependent output
-  // after the main thread closes so we use direct console output.
-  if (!_mi_preloading()) {
-    // _cputs(msg);  // _cputs cannot be used at is aborts if it fails to lock the console
-    static HANDLE hcon = INVALID_HANDLE_VALUE;
-    static bool hconIsConsole;
-    if (hcon == INVALID_HANDLE_VALUE) {
-      CONSOLE_SCREEN_BUFFER_INFO sbi;
-      hcon = GetStdHandle(STD_ERROR_HANDLE);
-      hconIsConsole = ((hcon != INVALID_HANDLE_VALUE) && GetConsoleScreenBufferInfo(hcon, &sbi));
-    }
-    const size_t len = strlen(msg);
-    if (len > 0 && len < UINT32_MAX) {
-      DWORD written = 0;
-      if (hconIsConsole) {
-        WriteConsoleA(hcon, msg, (DWORD)len, &written, NULL);
-      }
-      else if (hcon != INVALID_HANDLE_VALUE) {
-        // use direct write if stderr was redirected
-        WriteFile(hcon, msg, (DWORD)len, &written, NULL);
-      }
-      else {
-        // finally fall back to fputs after all
-        fputs(msg, stderr);
-      }
-    }
-  }
-  #else
-  fputs(msg, stderr);
-  #endif
-}
-
-// Since an output function can be registered earliest in the `main`
-// function we also buffer output that happens earlier. When
-// an output function is registered it is called immediately with
-// the output up to that point.
-#ifndef MI_MAX_DELAY_OUTPUT
-#define MI_MAX_DELAY_OUTPUT ((size_t)(32*1024))
-#endif
-static char out_buf[MI_MAX_DELAY_OUTPUT+1];
-static _Atomic(size_t) out_len;
-
-static void mi_cdecl mi_out_buf(const char* msg, void* arg) {
-  MI_UNUSED(arg);
-  if (msg==NULL) return;
-  if (mi_atomic_load_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return;
-  size_t n = strlen(msg);
-  if (n==0) return;
-  // claim space
-  size_t start = mi_atomic_add_acq_rel(&out_len, n);
-  if (start >= MI_MAX_DELAY_OUTPUT) return;
-  // check bound
-  if (start+n >= MI_MAX_DELAY_OUTPUT) {
-    n = MI_MAX_DELAY_OUTPUT-start-1;
-  }
-  _mi_memcpy(&out_buf[start], msg, n);
-}
-
-static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf, void* arg) {
-  if (out==NULL) return;
-  // claim (if `no_more_buf == true`, no more output will be added after this point)
-  size_t count = mi_atomic_add_acq_rel(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1));
-  // and output the current contents
-  if (count>MI_MAX_DELAY_OUTPUT) count = MI_MAX_DELAY_OUTPUT;
-  out_buf[count] = 0;
-  out(out_buf,arg);
-  if (!no_more_buf) {
-    out_buf[count] = '\n'; // if continue with the buffer, insert a newline
-  }
-}
-
-
-// Once this module is loaded, switch to this routine
-// which outputs to stderr and the delayed output buffer.
-static void mi_cdecl mi_out_buf_stderr(const char* msg, void* arg) {
-  mi_out_stderr(msg,arg);
-  mi_out_buf(msg,arg);
-}
-
-
-
-// --------------------------------------------------------
-// Default output handler
-// --------------------------------------------------------
-
-// Should be atomic but gives errors on many platforms as generally we cannot cast a function pointer to a uintptr_t.
-// For now, don't register output from multiple threads.
-static mi_output_fun* volatile mi_out_default; // = NULL
-static _Atomic(void*) mi_out_arg; // = NULL
-
-static mi_output_fun* mi_out_get_default(void** parg) {
-  if (parg != NULL) { *parg = mi_atomic_load_ptr_acquire(void,&mi_out_arg); }
-  mi_output_fun* out = mi_out_default;
-  return (out == NULL ? &mi_out_buf : out);
-}
-
-void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept {
-  mi_out_default = (out == NULL ? &mi_out_stderr : out); // stop using the delayed output buffer
-  mi_atomic_store_ptr_release(void,&mi_out_arg, arg);
-  if (out!=NULL) mi_out_buf_flush(out,true,arg);         // output all the delayed output now
-}
-
-// add stderr to the delayed output after the module is loaded
-static void mi_add_stderr_output() {
-  mi_assert_internal(mi_out_default == NULL);
-  mi_out_buf_flush(&mi_out_stderr, false, NULL); // flush current contents to stderr
-  mi_out_default = &mi_out_buf_stderr;           // and add stderr to the delayed output
-}
-
-// --------------------------------------------------------
-// Messages, all end up calling `_mi_fputs`.
-// --------------------------------------------------------
-static _Atomic(size_t) error_count;   // = 0;  // when >= max_error_count stop emitting errors
-static _Atomic(size_t) warning_count; // = 0;  // when >= max_warning_count stop emitting warnings
-
-// When overriding malloc, we may recurse into mi_vfprintf if an allocation
-// inside the C runtime causes another message.
-// In some cases (like on macOS) the loader already allocates which
-// calls into mimalloc; if we then access thread locals (like `recurse`)
-// this may crash as the access may call _tlv_bootstrap that tries to
-// (recursively) invoke malloc again to allocate space for the thread local
-// variables on demand. This is why we use a _mi_preloading test on such
-// platforms. However, C code generator may move the initial thread local address
-// load before the `if` and we therefore split it out in a separate funcion.
-static mi_decl_thread bool recurse = false;
-
-static mi_decl_noinline bool mi_recurse_enter_prim(void) {
-  if (recurse) return false;
-  recurse = true;
-  return true;
-}
-
-static mi_decl_noinline void mi_recurse_exit_prim(void) {
-  recurse = false;
-}
-
-static bool mi_recurse_enter(void) {
-  #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD)
-  if (_mi_preloading()) return true;
-  #endif
-  return mi_recurse_enter_prim();
-}
-
-static void mi_recurse_exit(void) {
-  #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD)
-  if (_mi_preloading()) return;
-  #endif
-  mi_recurse_exit_prim();
-}
-
-void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message) {
-  if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) { // TODO: use mi_out_stderr for stderr?
-    if (!mi_recurse_enter()) return;
-    out = mi_out_get_default(&arg);
-    if (prefix != NULL) out(prefix, arg);
-    out(message, arg);
-    mi_recurse_exit();
-  }
-  else {
-    if (prefix != NULL) out(prefix, arg);
-    out(message, arg);
-  }
-}
-
-// Define our own limited `fprintf` that avoids memory allocation.
-// We do this using `snprintf` with a limited buffer.
-static void mi_vfprintf( mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args ) {
-  char buf[512];
-  if (fmt==NULL) return;
-  if (!mi_recurse_enter()) return;
-  vsnprintf(buf,sizeof(buf)-1,fmt,args);
-  mi_recurse_exit();
-  _mi_fputs(out,arg,prefix,buf);
-}
-
-void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) {
-  va_list args;
-  va_start(args,fmt);
-  mi_vfprintf(out,arg,NULL,fmt,args);
-  va_end(args);
-}
-
-static void mi_vfprintf_thread(mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args) {
-  if (prefix != NULL && strlen(prefix) <= 32 && !_mi_is_main_thread()) {
-    char tprefix[64];
-    snprintf(tprefix, sizeof(tprefix), "%sthread 0x%llx: ", prefix, (unsigned long long)_mi_thread_id());
-    mi_vfprintf(out, arg, tprefix, fmt, args);
-  }
-  else {
-    mi_vfprintf(out, arg, prefix, fmt, args);
-  }
-}
-
-void _mi_trace_message(const char* fmt, ...) {
-  if (mi_option_get(mi_option_verbose) <= 1) return;  // only with verbose level 2 or higher
-  va_list args;
-  va_start(args, fmt);
-  mi_vfprintf_thread(NULL, NULL, "mimalloc: ", fmt, args);
-  va_end(args);
-}
-
-void _mi_verbose_message(const char* fmt, ...) {
-  if (!mi_option_is_enabled(mi_option_verbose)) return;
-  va_list args;
-  va_start(args,fmt);
-  mi_vfprintf(NULL, NULL, "mimalloc: ", fmt, args);
-  va_end(args);
-}
-
-static void mi_show_error_message(const char* fmt, va_list args) {
-  if (!mi_option_is_enabled(mi_option_verbose)) {
-    if (!mi_option_is_enabled(mi_option_show_errors)) return;
-    if (mi_max_error_count >= 0 && (long)mi_atomic_increment_acq_rel(&error_count) > mi_max_error_count) return;
-  }
-  mi_vfprintf_thread(NULL, NULL, "mimalloc: error: ", fmt, args);
-}
-
-void _mi_warning_message(const char* fmt, ...) {
-  if (!mi_option_is_enabled(mi_option_verbose)) {
-    if (!mi_option_is_enabled(mi_option_show_errors)) return;
-    if (mi_max_warning_count >= 0 && (long)mi_atomic_increment_acq_rel(&warning_count) > mi_max_warning_count) return;
-  }
-  va_list args;
-  va_start(args,fmt);
-  mi_vfprintf_thread(NULL, NULL, "mimalloc: warning: ", fmt, args);
-  va_end(args);
-}
-
-
-#if MI_DEBUG
-void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, const char* func ) {
-  _mi_fprintf(NULL, NULL, "mimalloc: assertion failed: at \"%s\":%u, %s\n  assertion: \"%s\"\n", fname, line, (func==NULL?"":func), assertion);
-  abort();
-}
-#endif
-
-// --------------------------------------------------------
-// Errors
-// --------------------------------------------------------
-
-static mi_error_fun* volatile  mi_error_handler; // = NULL
-static _Atomic(void*) mi_error_arg;     // = NULL
-
-static void mi_error_default(int err) {
-  MI_UNUSED(err);
-#if (MI_DEBUG>0)
-  if (err==EFAULT) {
-    #ifdef _MSC_VER
-    __debugbreak();
-    #endif
-    abort();
-  }
-#endif
-#if (MI_SECURE>0)
-  if (err==EFAULT) {  // abort on serious errors in secure mode (corrupted meta-data)
-    abort();
-  }
-#endif
-#if defined(MI_XMALLOC)
-  if (err==ENOMEM || err==EOVERFLOW) { // abort on memory allocation fails in xmalloc mode
-    abort();
-  }
-#endif
-}
-
-void mi_register_error(mi_error_fun* fun, void* arg) {
-  mi_error_handler = fun;  // can be NULL
-  mi_atomic_store_ptr_release(void,&mi_error_arg, arg);
-}
-
-void _mi_error_message(int err, const char* fmt, ...) {
-  // show detailed error message
-  va_list args;
-  va_start(args, fmt);
-  mi_show_error_message(fmt, args);
-  va_end(args);
-  // and call the error handler which may abort (or return normally)
-  if (mi_error_handler != NULL) {
-    mi_error_handler(err, mi_atomic_load_ptr_acquire(void,&mi_error_arg));
-  }
-  else {
-    mi_error_default(err);
-  }
-}
-
-// --------------------------------------------------------
-// Initialize options by checking the environment
-// --------------------------------------------------------
-
-static void mi_strlcpy(char* dest, const char* src, size_t dest_size) {
-  if (dest==NULL || src==NULL || dest_size == 0) return;
-  // copy until end of src, or when dest is (almost) full
-  while (*src != 0 && dest_size > 1) {
-    *dest++ = *src++;
-    dest_size--;
-  }
-  // always zero terminate
-  *dest = 0;
-}
-
-static void mi_strlcat(char* dest, const char* src, size_t dest_size) {
-  if (dest==NULL || src==NULL || dest_size == 0) return;
-  // find end of string in the dest buffer
-  while (*dest != 0 && dest_size > 1) {
-    dest++;
-    dest_size--;
-  }
-  // and catenate
-  mi_strlcpy(dest, src, dest_size);
-}
-
-#ifdef MI_NO_GETENV
-static bool mi_getenv(const char* name, char* result, size_t result_size) {
-  MI_UNUSED(name);
-  MI_UNUSED(result);
-  MI_UNUSED(result_size);
-  return false;
-}
-#else
-#if defined _WIN32
-// On Windows use GetEnvironmentVariable instead of getenv to work
-// reliably even when this is invoked before the C runtime is initialized.
-// i.e. when `_mi_preloading() == true`.
-// Note: on windows, environment names are not case sensitive.
-#include <windows.h>
-static bool mi_getenv(const char* name, char* result, size_t result_size) {
-  result[0] = 0;
-  size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size);
-  return (len > 0 && len < result_size);
-}
-#elif !defined(MI_USE_ENVIRON) || (MI_USE_ENVIRON!=0)
-// On Posix systemsr use `environ` to acces environment variables
-// even before the C runtime is initialized.
-#if defined(__APPLE__) && defined(__has_include) && __has_include(<crt_externs.h>)
-#include <crt_externs.h>
-static char** mi_get_environ(void) {
-  return (*_NSGetEnviron());
-}
-#else
-extern char** environ;
-static char** mi_get_environ(void) {
-  return environ;
-}
-#endif
-static int mi_strnicmp(const char* s, const char* t, size_t n) {
-  if (n == 0) return 0;
-  for (; *s != 0 && *t != 0 && n > 0; s++, t++, n--) {
-    if (toupper(*s) != toupper(*t)) break;
-  }
-  return (n == 0 ? 0 : *s - *t);
-}
-static bool mi_getenv(const char* name, char* result, size_t result_size) {
-  if (name==NULL) return false;
-  const size_t len = strlen(name);
-  if (len == 0) return false;
-  char** env = mi_get_environ();
-  if (env == NULL) return false;
-  // compare up to 256 entries
-  for (int i = 0; i < 256 && env[i] != NULL; i++) {
-    const char* s = env[i];
-    if (mi_strnicmp(name, s, len) == 0 && s[len] == '=') { // case insensitive
-      // found it
-      mi_strlcpy(result, s + len + 1, result_size);
-      return true;
-    }
-  }
-  return false;
-}
-#else
-// fallback: use standard C `getenv` but this cannot be used while initializing the C runtime
-static bool mi_getenv(const char* name, char* result, size_t result_size) {
-  // cannot call getenv() when still initializing the C runtime.
-  if (_mi_preloading()) return false;
-  const char* s = getenv(name);
-  if (s == NULL) {
-    // we check the upper case name too.
-    char buf[64+1];
-    size_t len = strlen(name);
-    if (len >= sizeof(buf)) len = sizeof(buf) - 1;
-    for (size_t i = 0; i < len; i++) {
-      buf[i] = toupper(name[i]);
-    }
-    buf[len] = 0;
-    s = getenv(buf);
-  }
-  if (s != NULL && strlen(s) < result_size) {
-    mi_strlcpy(result, s, result_size);
-    return true;
-  }
-  else {
-    return false;
-  }
-}
-#endif  // !MI_USE_ENVIRON
-#endif  // !MI_NO_GETENV
-
-static void mi_option_init(mi_option_desc_t* desc) {
-  // Read option value from the environment
-  char s[64+1];
-  char buf[64+1];
-  mi_strlcpy(buf, "mimalloc_", sizeof(buf));
-  mi_strlcat(buf, desc->name, sizeof(buf));
-  bool found = mi_getenv(buf,s,sizeof(s));
-  if (!found && desc->legacy_name != NULL) {
-    mi_strlcpy(buf, "mimalloc_", sizeof(buf));
-    mi_strlcat(buf, desc->legacy_name, sizeof(buf));
-    found = mi_getenv(buf,s,sizeof(s));
-    if (found) {
-      _mi_warning_message("environment option \"mimalloc_%s\" is deprecated -- use \"mimalloc_%s\" instead.\n", desc->legacy_name, desc->name );
-    }    
-  }
-
-  if (found) {
-    size_t len = strlen(s);
-    if (len >= sizeof(buf)) len = sizeof(buf) - 1;
-    for (size_t i = 0; i < len; i++) {
-      buf[i] = (char)toupper(s[i]);
-    }
-    buf[len] = 0;
-    if (buf[0]==0 || strstr("1;TRUE;YES;ON", buf) != NULL) {
-      desc->value = 1;
-      desc->init = INITIALIZED;
-    }
-    else if (strstr("0;FALSE;NO;OFF", buf) != NULL) {
-      desc->value = 0;
-      desc->init = INITIALIZED;
-    }
-    else {
-      char* end = buf;
-      long value = strtol(buf, &end, 10);
-      if (desc->option == mi_option_reserve_os_memory) {
-        // this option is interpreted in KiB to prevent overflow of `long`
-        if (*end == 'K') { end++; }
-        else if (*end == 'M') { value *= MI_KiB; end++; }
-        else if (*end == 'G') { value *= MI_MiB; end++; }
-        else { value = (value + MI_KiB - 1) / MI_KiB; }
-        if (end[0] == 'I' && end[1] == 'B') { end += 2; }
-        else if (*end == 'B') { end++; }
-      }
-      if (*end == 0) {
-        desc->value = value;
-        desc->init = INITIALIZED;
-      }
-      else {
-        // set `init` first to avoid recursion through _mi_warning_message on mimalloc_verbose.
-        desc->init = DEFAULTED;
-        if (desc->option == mi_option_verbose && desc->value == 0) {
-          // if the 'mimalloc_verbose' env var has a bogus value we'd never know
-          // (since the value defaults to 'off') so in that case briefly enable verbose
-          desc->value = 1;
-          _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name );
-          desc->value = 0;
-        }
-        else {
-          _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name );
-        }
-      }
-    }
-    mi_assert_internal(desc->init != UNINIT);
-  }
-  else if (!_mi_preloading()) {
-    desc->init = DEFAULTED;
-  }
-}
diff --git a/3rd/mimalloc-2.0.9/src/os.c b/3rd/mimalloc-2.0.9/src/os.c
deleted file mode 100644
index 0f984741..00000000
--- a/3rd/mimalloc-2.0.9/src/os.c
+++ /dev/null
@@ -1,1479 +0,0 @@
-/* ----------------------------------------------------------------------------
-Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
------------------------------------------------------------------------------*/
-#ifndef _DEFAULT_SOURCE
-#define _DEFAULT_SOURCE   // ensure mmap flags are defined
-#endif
-
-#if defined(__sun)
-// illumos provides new mman.h api when any of these are defined
-// otherwise the old api based on caddr_t which predates the void pointers one.
-// stock solaris provides only the former, chose to atomically to discard those
-// flags only here rather than project wide tough.
-#undef _XOPEN_SOURCE
-#undef _POSIX_C_SOURCE
-#endif
-#include "mimalloc.h"
-#include "mimalloc-internal.h"
-#include "mimalloc-atomic.h"
-
-#include <string.h>  // strerror
-
-#ifdef _MSC_VER
-#pragma warning(disable:4996)  // strerror
-#endif
-
-#if defined(__wasi__)
-#define MI_USE_SBRK
-#endif
-
-#if defined(_WIN32)
-#include <windows.h>
-#elif defined(__wasi__)
-#include <unistd.h>    // sbrk
-#else
-#include <sys/mman.h>  // mmap
-#include <unistd.h>    // sysconf
-#if defined(__linux__)
-#include <features.h>
-#include <fcntl.h>
-#if defined(__GLIBC__)
-#include <linux/mman.h> // linux mmap flags
-#else
-#include <sys/mman.h>
-#endif
-#endif
-#if defined(__APPLE__)
-#include <TargetConditionals.h>
-#if !TARGET_IOS_IPHONE && !TARGET_IOS_SIMULATOR
-#include <mach/vm_statistics.h>
-#endif
-#endif
-#if defined(__FreeBSD__) || defined(__DragonFly__)
-#include <sys/param.h>
-#if __FreeBSD_version >= 1200000
-#include <sys/cpuset.h>
-#include <sys/domainset.h>
-#endif
-#include <sys/sysctl.h>
-#endif
-#endif
-
-/* -----------------------------------------------------------
-  Initialization.
-  On windows initializes support for aligned allocation and
-  large OS pages (if MIMALLOC_LARGE_OS_PAGES is true).
------------------------------------------------------------ */
-bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
-bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats);
-
-static void* mi_align_up_ptr(void* p, size_t alignment) {
-  return (void*)_mi_align_up((uintptr_t)p, alignment);
-}
-
-static void* mi_align_down_ptr(void* p, size_t alignment) {
-  return (void*)_mi_align_down((uintptr_t)p, alignment);
-}
-
-
-// page size (initialized properly in `os_init`)
-static size_t os_page_size = 4096;
-
-// minimal allocation granularity
-static size_t os_alloc_granularity = 4096;
-
-// if non-zero, use large page allocation
-static size_t large_os_page_size = 0;
-
-// is memory overcommit allowed?
-// set dynamically in _mi_os_init (and if true we use MAP_NORESERVE)
-static bool os_overcommit = true;
-
-bool _mi_os_has_overcommit(void) {
-  return os_overcommit;
-}
-
-// OS (small) page size
-size_t _mi_os_page_size(void) {
-  return os_page_size;
-}
-
-// if large OS pages are supported (2 or 4MiB), then return the size, otherwise return the small page size (4KiB)
-size_t _mi_os_large_page_size(void) {
-  return (large_os_page_size != 0 ? large_os_page_size : _mi_os_page_size());
-}
-
-#if !defined(MI_USE_SBRK) && !defined(__wasi__)
-static bool use_large_os_page(size_t size, size_t alignment) {
-  // if we have access, check the size and alignment requirements
-  if (large_os_page_size == 0 || !mi_option_is_enabled(mi_option_large_os_pages)) return false;
-  return ((size % large_os_page_size) == 0 && (alignment % large_os_page_size) == 0);
-}
-#endif
-
-// round to a good OS allocation size (bounded by max 12.5% waste)
-size_t _mi_os_good_alloc_size(size_t size) {
-  size_t align_size;
-  if (size < 512*MI_KiB) align_size = _mi_os_page_size();
-  else if (size < 2*MI_MiB) align_size = 64*MI_KiB;
-  else if (size < 8*MI_MiB) align_size = 256*MI_KiB;
-  else if (size < 32*MI_MiB) align_size = 1*MI_MiB;
-  else align_size = 4*MI_MiB;
-  if mi_unlikely(size >= (SIZE_MAX - align_size)) return size; // possible overflow?
-  return _mi_align_up(size, align_size);
-}
-
-#if defined(_WIN32)
-// We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016.
-// So, we need to look it up dynamically to run on older systems. (use __stdcall for 32-bit compatibility)
-// NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB)
-// We define a minimal MEM_EXTENDED_PARAMETER ourselves in order to be able to compile with older SDK's.
-typedef enum MI_MEM_EXTENDED_PARAMETER_TYPE_E {
-  MiMemExtendedParameterInvalidType = 0,
-  MiMemExtendedParameterAddressRequirements,
-  MiMemExtendedParameterNumaNode,
-  MiMemExtendedParameterPartitionHandle,
-  MiMemExtendedParameterUserPhysicalHandle,
-  MiMemExtendedParameterAttributeFlags,
-  MiMemExtendedParameterMax
-} MI_MEM_EXTENDED_PARAMETER_TYPE;
-
-typedef struct DECLSPEC_ALIGN(8) MI_MEM_EXTENDED_PARAMETER_S {
-  struct { DWORD64 Type : 8; DWORD64 Reserved : 56; } Type;
-  union  { DWORD64 ULong64; PVOID Pointer; SIZE_T Size; HANDLE Handle; DWORD ULong; } Arg;
-} MI_MEM_EXTENDED_PARAMETER;
-
-typedef struct MI_MEM_ADDRESS_REQUIREMENTS_S {
-  PVOID  LowestStartingAddress;
-  PVOID  HighestEndingAddress;
-  SIZE_T Alignment;
-} MI_MEM_ADDRESS_REQUIREMENTS;
-
-#define MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE   0x00000010
-
-#include <winternl.h>
-typedef PVOID    (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG);
-typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG);
-static PVirtualAlloc2 pVirtualAlloc2 = NULL;
-static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL;
-
-// Similarly, GetNumaProcesorNodeEx is only supported since Windows 7
-typedef struct MI_PROCESSOR_NUMBER_S { WORD Group; BYTE Number; BYTE Reserved; } MI_PROCESSOR_NUMBER;
-
-typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(MI_PROCESSOR_NUMBER* ProcNumber);
-typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(MI_PROCESSOR_NUMBER* Processor, PUSHORT NodeNumber);
-typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINITY ProcessorMask);
-typedef BOOL (__stdcall *PGetNumaProcessorNode)(UCHAR Processor, PUCHAR NodeNumber);
-static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL;
-static PGetNumaProcessorNodeEx      pGetNumaProcessorNodeEx = NULL;
-static PGetNumaNodeProcessorMaskEx  pGetNumaNodeProcessorMaskEx = NULL;
-static PGetNumaProcessorNode        pGetNumaProcessorNode = NULL;
-
-static bool mi_win_enable_large_os_pages(void)
-{
-  if (large_os_page_size > 0) return true;
-
-  // Try to see if large OS pages are supported
-  // To use large pages on Windows, we first need access permission
-  // Set "Lock pages in memory" permission in the group policy editor
-  // <https://devblogs.microsoft.com/oldnewthing/20110128-00/?p=11643>
-  unsigned long err = 0;
-  HANDLE token = NULL;
-  BOOL ok = OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token);
-  if (ok) {
-    TOKEN_PRIVILEGES tp;
-    ok = LookupPrivilegeValue(NULL, TEXT("SeLockMemoryPrivilege"), &tp.Privileges[0].Luid);
-    if (ok) {
-      tp.PrivilegeCount = 1;
-      tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
-      ok = AdjustTokenPrivileges(token, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0);
-      if (ok) {
-        err = GetLastError();
-        ok = (err == ERROR_SUCCESS);
-        if (ok) {
-          large_os_page_size = GetLargePageMinimum();
-        }
-      }
-    }
-    CloseHandle(token);
-  }
-  if (!ok) {
-    if (err == 0) err = GetLastError();
-    _mi_warning_message("cannot enable large OS page support, error %lu\n", err);
-  }
-  return (ok!=0);
-}
-
-void _mi_os_init(void)
-{
-  os_overcommit = false;
-  // get the page size
-  SYSTEM_INFO si;
-  GetSystemInfo(&si);
-  if (si.dwPageSize > 0) os_page_size = si.dwPageSize;
-  if (si.dwAllocationGranularity > 0) os_alloc_granularity = si.dwAllocationGranularity;
-  // get the VirtualAlloc2 function
-  HINSTANCE  hDll;
-  hDll = LoadLibrary(TEXT("kernelbase.dll"));
-  if (hDll != NULL) {
-    // use VirtualAlloc2FromApp if possible as it is available to Windows store apps
-    pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp");
-    if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2");
-    FreeLibrary(hDll);
-  }
-  // NtAllocateVirtualMemoryEx is used for huge page allocation
-  hDll = LoadLibrary(TEXT("ntdll.dll"));
-  if (hDll != NULL) {
-    pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx");
-    FreeLibrary(hDll);
-  }
-  // Try to use Win7+ numa API
-  hDll = LoadLibrary(TEXT("kernel32.dll"));
-  if (hDll != NULL) {
-    pGetCurrentProcessorNumberEx = (PGetCurrentProcessorNumberEx)(void (*)(void))GetProcAddress(hDll, "GetCurrentProcessorNumberEx");
-    pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx");
-    pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx");
-    pGetNumaProcessorNode = (PGetNumaProcessorNode)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNode");
-    FreeLibrary(hDll);
-  }
-  if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
-    mi_win_enable_large_os_pages();
-  }
-}
-#elif defined(__wasi__)
-void _mi_os_init(void) {
-  os_overcommit = false;
-  os_page_size = 64*MI_KiB; // WebAssembly has a fixed page size: 64KiB
-  os_alloc_granularity = 16;
-}
-
-#else  // generic unix
-
-static void os_detect_overcommit(void) {
-#if defined(__linux__)
-  int fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
-	if (fd < 0) return;
-  char buf[32];
-  ssize_t nread = read(fd, &buf, sizeof(buf));
-	close(fd);
-  // <https://www.kernel.org/doc/Documentation/vm/overcommit-accounting>
-  // 0: heuristic overcommit, 1: always overcommit, 2: never overcommit (ignore NORESERVE)
-  if (nread >= 1) {
-    os_overcommit = (buf[0] == '0' || buf[0] == '1');
-  }
-#elif defined(__FreeBSD__)
-  int val = 0;
-  size_t olen = sizeof(val);
-  if (sysctlbyname("vm.overcommit", &val, &olen, NULL, 0) == 0) {
-    os_overcommit = (val != 0);
-  }
-#else
-  // default: overcommit is true
-#endif
-}
-
-void _mi_os_init(void) {
-  // get the page size
-  long result = sysconf(_SC_PAGESIZE);
-  if (result > 0) {
-    os_page_size = (size_t)result;
-    os_alloc_granularity = os_page_size;
-  }
-  large_os_page_size = 2*MI_MiB; // TODO: can we query the OS for this?
-  os_detect_overcommit();
-}
-#endif
-
-
-#if defined(MADV_NORMAL)
-static int mi_madvise(void* addr, size_t length, int advice) {
-  #if defined(__sun)
-  return madvise((caddr_t)addr, length, advice);  // Solaris needs cast (issue #520)
-  #else
-  return madvise(addr, length, advice);
-  #endif
-}
-#endif
-
-
-/* -----------------------------------------------------------
-  aligned hinting
--------------------------------------------------------------- */
-
-// On 64-bit systems, we can do efficient aligned allocation by using
-// the 2TiB to 30TiB area to allocate those.
-#if (MI_INTPTR_SIZE >= 8)
-static mi_decl_cache_align _Atomic(uintptr_t)aligned_base;
-
-// Return a MI_SEGMENT_SIZE aligned address that is probably available.
-// If this returns NULL, the OS will determine the address but on some OS's that may not be
-// properly aligned which can be more costly as it needs to be adjusted afterwards.
-// For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization;
-// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses
-//  in the middle of the 2TiB - 6TiB address range (see issue #372))
-
-#define MI_HINT_BASE ((uintptr_t)2 << 40)  // 2TiB start
-#define MI_HINT_AREA ((uintptr_t)4 << 40)  // upto 6TiB   (since before win8 there is "only" 8TiB available to processes)
-#define MI_HINT_MAX  ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages)
-
-static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size)
-{
-  if (try_alignment <= 1 || try_alignment > MI_SEGMENT_SIZE) return NULL;
-  size = _mi_align_up(size, MI_SEGMENT_SIZE);
-  if (size > 1*MI_GiB) return NULL;  // guarantee the chance of fixed valid address is at most 1/(MI_HINT_AREA / 1<<30) = 1/4096.
-  #if (MI_SECURE>0)
-  size += MI_SEGMENT_SIZE;        // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas.
-  #endif
-
-  uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size);
-  if (hint == 0 || hint > MI_HINT_MAX) {   // wrap or initialize
-    uintptr_t init = MI_HINT_BASE;
-    #if (MI_SECURE>0 || MI_DEBUG==0)       // security: randomize start of aligned allocations unless in debug mode
-    uintptr_t r = _mi_heap_random_next(mi_get_default_heap());
-    init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA);  // (randomly 20 bits)*4MiB == 0 to 4TiB
-    #endif
-    uintptr_t expected = hint + size;
-    mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init);
-    hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > MI_HINT_MAX but that is ok, it is a hint after all
-  }
-  if (hint%try_alignment != 0) return NULL;
-  return (void*)hint;
-}
-#else
-static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
-  MI_UNUSED(try_alignment); MI_UNUSED(size);
-  return NULL;
-}
-#endif
-
-/* -----------------------------------------------------------
-  Free memory
--------------------------------------------------------------- */
-
-static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats_t* stats)
-{
-  if (addr == NULL || size == 0) return true; // || _mi_os_is_huge_reserved(addr)
-  bool err = false;
-#if defined(_WIN32)
-  DWORD errcode = 0;
-  err = (VirtualFree(addr, 0, MEM_RELEASE) == 0);
-  if (err) { errcode = GetLastError(); }
-  if (errcode == ERROR_INVALID_ADDRESS) {
-    // In mi_os_mem_alloc_aligned the fallback path may have returned a pointer inside
-    // the memory region returned by VirtualAlloc; in that case we need to free using
-    // the start of the region.
-    MEMORY_BASIC_INFORMATION info = { 0 };
-    VirtualQuery(addr, &info, sizeof(info));
-    if (info.AllocationBase < addr && ((uint8_t*)addr - (uint8_t*)info.AllocationBase) < (ptrdiff_t)MI_SEGMENT_SIZE) {
-      errcode = 0;
-      err = (VirtualFree(info.AllocationBase, 0, MEM_RELEASE) == 0);
-      if (err) { errcode = GetLastError(); }
-    }
-  }
-  if (errcode != 0) {
-    _mi_warning_message("unable to release OS memory: error code 0x%x, addr: %p, size: %zu\n", errcode, addr, size);
-  }
-#elif defined(MI_USE_SBRK) || defined(__wasi__)
-  err = false; // sbrk heap cannot be shrunk
-#else
-  err = (munmap(addr, size) == -1);
-  if (err) {
-    _mi_warning_message("unable to release OS memory: %s, addr: %p, size: %zu\n", strerror(errno), addr, size);
-  }
-#endif
-  if (was_committed) { _mi_stat_decrease(&stats->committed, size); }
-  _mi_stat_decrease(&stats->reserved, size);
-  return !err;
-}
-
-
-/* -----------------------------------------------------------
-  Raw allocation on Windows (VirtualAlloc)
--------------------------------------------------------------- */
-
-#ifdef _WIN32
- 
-#define MEM_COMMIT_RESERVE  (MEM_COMMIT|MEM_RESERVE)
-
-static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) {
-#if (MI_INTPTR_SIZE >= 8)
-  // on 64-bit systems, try to use the virtual address area after 2TiB for 4MiB aligned allocations
-  if (addr == NULL) {
-    void* hint = mi_os_get_aligned_hint(try_alignment,size);
-    if (hint != NULL) {
-      void* p = VirtualAlloc(hint, size, flags, PAGE_READWRITE);
-      if (p != NULL) return p;
-      _mi_verbose_message("warning: unable to allocate hinted aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), hint, try_alignment, flags);
-      // fall through on error
-    }
-  }
-#endif
-  // on modern Windows try use VirtualAlloc2 for aligned allocation
-  if (try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) {
-    MI_MEM_ADDRESS_REQUIREMENTS reqs = { 0, 0, 0 };
-    reqs.Alignment = try_alignment;
-    MI_MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} };
-    param.Type.Type = MiMemExtendedParameterAddressRequirements;
-    param.Arg.Pointer = &reqs;
-    void* p = (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, &param, 1);
-    if (p != NULL) return p;
-    _mi_warning_message("unable to allocate aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), addr, try_alignment, flags);
-    // fall through on error
-  }
-  // last resort
-  return VirtualAlloc(addr, size, flags, PAGE_READWRITE);
-}
-
-static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only, bool allow_large, bool* is_large) {
-  mi_assert_internal(!(large_only && !allow_large));
-  static _Atomic(size_t) large_page_try_ok; // = 0;
-  void* p = NULL;
-  // Try to allocate large OS pages (2MiB) if allowed or required.
-  if ((large_only || use_large_os_page(size, try_alignment))
-      && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) {
-    size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok);
-    if (!large_only && try_ok > 0) {
-      // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive.
-      // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times.
-      mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1);
-    }
-    else {
-      // large OS pages must always reserve and commit.
-      *is_large = true;
-      p = mi_win_virtual_allocx(addr, size, try_alignment, flags | MEM_LARGE_PAGES);
-      if (large_only) return p;
-      // fall back to non-large page allocation on error (`p == NULL`).
-      if (p == NULL) {
-        mi_atomic_store_release(&large_page_try_ok,10UL);  // on error, don't try again for the next N allocations
-      }
-    }
-  }
-  // Fall back to regular page allocation
-  if (p == NULL) {
-    *is_large = ((flags&MEM_LARGE_PAGES) != 0);
-    p = mi_win_virtual_allocx(addr, size, try_alignment, flags);
-  }
-  if (p == NULL) {
-    _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x, large only: %d, allow large: %d)\n", size, GetLastError(), addr, try_alignment, flags, large_only, allow_large);
-  }
-  return p;
-}
-
-/* -----------------------------------------------------------
-  Raw allocation using `sbrk` or `wasm_memory_grow`
--------------------------------------------------------------- */
-
-#elif defined(MI_USE_SBRK) || defined(__wasi__)
-#if defined(MI_USE_SBRK)
-  static void* mi_memory_grow( size_t size ) {
-    void* p = sbrk(size);
-    if (p == (void*)(-1)) return NULL;
-    #if !defined(__wasi__) // on wasi this is always zero initialized already (?)
-    memset(p,0,size);
-    #endif
-    return p;
-  }
-#elif defined(__wasi__)
-  static void* mi_memory_grow( size_t size ) {
-    size_t base = (size > 0 ? __builtin_wasm_memory_grow(0,_mi_divide_up(size, _mi_os_page_size()))
-                            : __builtin_wasm_memory_size(0));
-    if (base == SIZE_MAX) return NULL;
-    return (void*)(base * _mi_os_page_size());
-  }
-#endif
-
-#if defined(MI_USE_PTHREADS)
-static pthread_mutex_t mi_heap_grow_mutex = PTHREAD_MUTEX_INITIALIZER;
-#endif
-
-static void* mi_heap_grow(size_t size, size_t try_alignment) {
-  void* p = NULL;
-  if (try_alignment <= 1) {
-    // `sbrk` is not thread safe in general so try to protect it (we could skip this on WASM but leave it in for now)
-    #if defined(MI_USE_PTHREADS)
-    pthread_mutex_lock(&mi_heap_grow_mutex);
-    #endif
-    p = mi_memory_grow(size);
-    #if defined(MI_USE_PTHREADS)
-    pthread_mutex_unlock(&mi_heap_grow_mutex);
-    #endif
-  }
-  else {
-    void* base = NULL;
-    size_t alloc_size = 0;
-    // to allocate aligned use a lock to try to avoid thread interaction
-    // between getting the current size and actual allocation
-    // (also, `sbrk` is not thread safe in general)
-    #if defined(MI_USE_PTHREADS)
-    pthread_mutex_lock(&mi_heap_grow_mutex);
-    #endif
-    {
-      void* current = mi_memory_grow(0);  // get current size
-      if (current != NULL) {
-        void* aligned_current = mi_align_up_ptr(current, try_alignment);  // and align from there to minimize wasted space
-        alloc_size = _mi_align_up( ((uint8_t*)aligned_current - (uint8_t*)current) + size, _mi_os_page_size());
-        base = mi_memory_grow(alloc_size);
-      }
-    }
-    #if defined(MI_USE_PTHREADS)
-    pthread_mutex_unlock(&mi_heap_grow_mutex);
-    #endif
-    if (base != NULL) {
-      p = mi_align_up_ptr(base, try_alignment);
-      if ((uint8_t*)p + size > (uint8_t*)base + alloc_size) {
-        // another thread used wasm_memory_grow/sbrk in-between and we do not have enough
-        // space after alignment. Give up (and waste the space as we cannot shrink :-( )
-        // (in `mi_os_mem_alloc_aligned` this will fall back to overallocation to align)
-        p = NULL;
-      }
-    }
-  }
-  if (p == NULL) {
-    _mi_warning_message("unable to allocate sbrk/wasm_memory_grow OS memory (%zu bytes, %zu alignment)\n", size, try_alignment);
-    errno = ENOMEM;
-    return NULL;
-  }
-  mi_assert_internal( try_alignment == 0 || (uintptr_t)p % try_alignment == 0 );
-  return p;
-}
-
-/* -----------------------------------------------------------
-  Raw allocation on Unix's (mmap)
--------------------------------------------------------------- */
-#else
-#define MI_OS_USE_MMAP
-static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) {
-  MI_UNUSED(try_alignment);
-  #if defined(MAP_ALIGNED)  // BSD
-  if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) {
-    size_t n = mi_bsr(try_alignment);
-    if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) {  // alignment is a power of 2 and 4096 <= alignment <= 1GiB
-      flags |= MAP_ALIGNED(n);
-      void* p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0);
-      if (p!=MAP_FAILED) return p;
-      // fall back to regular mmap
-    }
-  }
-  #elif defined(MAP_ALIGN)  // Solaris
-  if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) {
-    void* p = mmap((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0);  // addr parameter is the required alignment
-    if (p!=MAP_FAILED) return p;
-    // fall back to regular mmap
-  }
-  #endif
-  #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED)
-  // on 64-bit systems, use the virtual address area after 2TiB for 4MiB aligned allocations
-  if (addr == NULL) {
-    void* hint = mi_os_get_aligned_hint(try_alignment, size);
-    if (hint != NULL) {
-      void* p = mmap(hint, size, protect_flags, flags, fd, 0);
-      if (p!=MAP_FAILED) return p;
-      // fall back to regular mmap
-    }
-  }
-  #endif
-  // regular mmap
-  void* p = mmap(addr, size, protect_flags, flags, fd, 0);
-  if (p!=MAP_FAILED) return p;
-  // failed to allocate
-  return NULL;
-}
-
-static int mi_unix_mmap_fd(void) {
-#if defined(VM_MAKE_TAG)
-  // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99)
-  int os_tag = (int)mi_option_get(mi_option_os_tag);
-  if (os_tag < 100 || os_tag > 255) os_tag = 100;
-  return VM_MAKE_TAG(os_tag);
-#else
-  return -1;
-#endif
-}
-
-static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) {
-  void* p = NULL;
-  #if !defined(MAP_ANONYMOUS)
-  #define MAP_ANONYMOUS  MAP_ANON
-  #endif
-  #if !defined(MAP_NORESERVE)
-  #define MAP_NORESERVE  0
-  #endif
-  const int fd = mi_unix_mmap_fd();
-  int flags = MAP_PRIVATE | MAP_ANONYMOUS;
-  if (_mi_os_has_overcommit()) {
-    flags |= MAP_NORESERVE;
-  }
-  #if defined(PROT_MAX)
-  protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD
-  #endif    
-  // huge page allocation
-  if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) {
-    static _Atomic(size_t) large_page_try_ok; // = 0;
-    size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok);
-    if (!large_only && try_ok > 0) {
-      // If the OS is not configured for large OS pages, or the user does not have
-      // enough permission, the `mmap` will always fail (but it might also fail for other reasons).
-      // Therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times
-      // to avoid too many failing calls to mmap.
-      mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1);
-    }
-    else {
-      int lflags = flags & ~MAP_NORESERVE;  // using NORESERVE on huge pages seems to fail on Linux
-      int lfd = fd;
-      #ifdef MAP_ALIGNED_SUPER
-      lflags |= MAP_ALIGNED_SUPER;
-      #endif
-      #ifdef MAP_HUGETLB
-      lflags |= MAP_HUGETLB;
-      #endif
-      #ifdef MAP_HUGE_1GB
-      static bool mi_huge_pages_available = true;
-      if ((size % MI_GiB) == 0 && mi_huge_pages_available) {
-        lflags |= MAP_HUGE_1GB;
-      }
-      else
-      #endif
-      {
-        #ifdef MAP_HUGE_2MB
-        lflags |= MAP_HUGE_2MB;
-        #endif
-      }
-      #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB
-      lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB;
-      #endif
-      if (large_only || lflags != flags) {
-        // try large OS page allocation
-        *is_large = true;
-        p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd);
-        #ifdef MAP_HUGE_1GB
-        if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) {
-          mi_huge_pages_available = false; // don't try huge 1GiB pages again
-          _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno);
-          lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB);
-          p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd);
-        }
-        #endif
-        if (large_only) return p;
-        if (p == NULL) {
-          mi_atomic_store_release(&large_page_try_ok, (size_t)8);  // on error, don't try again for the next N allocations
-        }
-      }
-    }
-  }
-  // regular allocation
-  if (p == NULL) {
-    *is_large = false;
-    p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd);
-    if (p != NULL) {
-      #if defined(MADV_HUGEPAGE)
-      // Many Linux systems don't allow MAP_HUGETLB but they support instead
-      // transparent huge pages (THP). Generally, it is not required to call `madvise` with MADV_HUGE
-      // though since properly aligned allocations will already use large pages if available
-      // in that case -- in particular for our large regions (in `memory.c`).
-      // However, some systems only allow THP if called with explicit `madvise`, so
-      // when large OS pages are enabled for mimalloc, we call `madvise` anyways.
-      if (allow_large && use_large_os_page(size, try_alignment)) {
-        if (mi_madvise(p, size, MADV_HUGEPAGE) == 0) {
-          *is_large = true; // possibly
-        };
-      }
-      #elif defined(__sun)
-      if (allow_large && use_large_os_page(size, try_alignment)) {
-        struct memcntl_mha cmd = {0};
-        cmd.mha_pagesize = large_os_page_size;
-        cmd.mha_cmd = MHA_MAPSIZE_VA;
-        if (memcntl((caddr_t)p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) {
-          *is_large = true;
-        }
-      }
-      #endif
-    }
-  }
-  if (p == NULL) {
-    _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: %i, address: %p, large only: %d, allow large: %d)\n", size, errno, addr, large_only, allow_large);
-  }
-  return p;
-}
-#endif
-
-
-/* -----------------------------------------------------------
-   Primitive allocation from the OS.
--------------------------------------------------------------- */
-
-// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
-static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, mi_stats_t* stats) {
-  mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
-  if (size == 0) return NULL;
-  if (!commit) allow_large = false;
-  if (try_alignment == 0) try_alignment = 1; // avoid 0 to ensure there will be no divide by zero when aligning
-
-  void* p = NULL;
-  /*
-  if (commit && allow_large) {
-    p = _mi_os_try_alloc_from_huge_reserved(size, try_alignment);
-    if (p != NULL) {
-      *is_large = true;
-      return p;
-    }
-  }
-  */
-
-  #if defined(_WIN32)
-    int flags = MEM_RESERVE;
-    if (commit) { flags |= MEM_COMMIT; }
-    p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large);
-  #elif defined(MI_USE_SBRK) || defined(__wasi__)
-    MI_UNUSED(allow_large);
-    *is_large = false;
-    p = mi_heap_grow(size, try_alignment);
-  #else
-    int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE);
-    p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large);
-  #endif
-  mi_stat_counter_increase(stats->mmap_calls, 1);
-  if (p != NULL) {
-    _mi_stat_increase(&stats->reserved, size);
-    if (commit) { _mi_stat_increase(&stats->committed, size); }
-  }
-  return p;
-}
-
-
-// Primitive aligned allocation from the OS.
-// This function guarantees the allocated memory is aligned.
-static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, mi_stats_t* stats) {
-  mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0));
-  mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
-  mi_assert_internal(is_large != NULL);
-  if (!commit) allow_large = false;
-  if (!(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0))) return NULL;
-  size = _mi_align_up(size, _mi_os_page_size());
-
-  // try first with a hint (this will be aligned directly on Win 10+ or BSD)
-  void* p = mi_os_mem_alloc(size, alignment, commit, allow_large, is_large, stats);
-  if (p == NULL) return NULL;
-
-  // if not aligned, free it, overallocate, and unmap around it
-  if (((uintptr_t)p % alignment != 0)) {
-    mi_os_mem_free(p, size, commit, stats);
-    _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (%zu bytes, address: %p, alignment: %zu, commit: %d)\n", size, p, alignment, commit);
-    if (size >= (SIZE_MAX - alignment)) return NULL; // overflow
-    const size_t over_size = size + alignment;
-
-#if _WIN32
-    // over-allocate uncommitted (virtual) memory
-    p = mi_os_mem_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, stats);
-    if (p == NULL) return NULL;
-
-    // set p to the aligned part in the full region
-    // note: this is dangerous on Windows as VirtualFree needs the actual region pointer
-    // but in mi_os_mem_free we handle this (hopefully exceptional) situation.
-    p = mi_align_up_ptr(p, alignment);
-
-    // explicitly commit only the aligned part
-    if (commit) {
-      _mi_os_commit(p, size, NULL, stats);
-    }
-#else
-    // overallocate...
-    p = mi_os_mem_alloc(over_size, 1, commit, false, is_large, stats);
-    if (p == NULL) return NULL;
-    // and selectively unmap parts around the over-allocated area. (noop on sbrk)
-    void* aligned_p = mi_align_up_ptr(p, alignment);
-    size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p;
-    size_t mid_size = _mi_align_up(size, _mi_os_page_size());
-    size_t post_size = over_size - pre_size - mid_size;
-    mi_assert_internal(pre_size < over_size && post_size < over_size && mid_size >= size);
-    if (pre_size > 0)  mi_os_mem_free(p, pre_size, commit, stats);
-    if (post_size > 0) mi_os_mem_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats);
-    // we can return the aligned pointer on `mmap` (and sbrk) systems
-    p = aligned_p;
-#endif
-  }
-
-  mi_assert_internal(p == NULL || (p != NULL && ((uintptr_t)p % alignment) == 0));
-  return p;
-}
-
-
-/* -----------------------------------------------------------
-  OS API: alloc, free, alloc_aligned
------------------------------------------------------------ */
-
-void* _mi_os_alloc(size_t size, mi_stats_t* tld_stats) {
-  MI_UNUSED(tld_stats);
-  mi_stats_t* stats = &_mi_stats_main;
-  if (size == 0) return NULL;
-  size = _mi_os_good_alloc_size(size);
-  bool is_large = false;
-  return mi_os_mem_alloc(size, 0, true, false, &is_large, stats);
-}
-
-void  _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* tld_stats) {
-  MI_UNUSED(tld_stats);
-  mi_stats_t* stats = &_mi_stats_main;
-  if (size == 0 || p == NULL) return;
-  size = _mi_os_good_alloc_size(size);
-  mi_os_mem_free(p, size, was_committed, stats);
-}
-
-void  _mi_os_free(void* p, size_t size, mi_stats_t* stats) {
-  _mi_os_free_ex(p, size, true, stats);
-}
-
-void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* tld_stats)
-{
-  MI_UNUSED(&mi_os_get_aligned_hint); // suppress unused warnings
-  MI_UNUSED(tld_stats);
-  if (size == 0) return NULL;
-  size = _mi_os_good_alloc_size(size);
-  alignment = _mi_align_up(alignment, _mi_os_page_size());
-  bool allow_large = false;
-  if (large != NULL) {
-    allow_large = *large;
-    *large = false;
-  }
-  return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), &_mi_stats_main /*tld->stats*/ );
-}
-
-/* -----------------------------------------------------------
-  OS aligned allocation with an offset. This is used
-  for large alignments > MI_ALIGNMENT_MAX. We use a large mimalloc
-  page where the object can be aligned at an offset from the start of the segment.
-  As we may need to overallocate, we need to free such pointers using `mi_free_aligned`
-  to use the actual start of the memory region.
------------------------------------------------------------ */
-
-void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t offset, bool commit, bool* large, mi_stats_t* tld_stats) {
-  mi_assert(offset <= MI_SEGMENT_SIZE);
-  mi_assert(offset <= size);
-  mi_assert((alignment % _mi_os_page_size()) == 0);
-  if (offset > MI_SEGMENT_SIZE) return NULL;
-  if (offset == 0) {
-    // regular aligned allocation
-    return _mi_os_alloc_aligned(size, alignment, commit, large, tld_stats);
-  }
-  else {
-    // overallocate to align at an offset
-    const size_t extra = _mi_align_up(offset, alignment) - offset;
-    const size_t oversize = size + extra;
-    void* start = _mi_os_alloc_aligned(oversize, alignment, commit, large, tld_stats);
-    if (start == NULL) return NULL;
-    void* p = (uint8_t*)start + extra;
-    mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment));
-    // decommit the overallocation at the start
-    if (commit && extra > _mi_os_page_size()) {
-      _mi_os_decommit(start, extra, tld_stats);
-    }
-    return p;
-  }
-}
-
-void _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats) {
-  mi_assert(align_offset <= MI_SEGMENT_SIZE);
-  const size_t extra = _mi_align_up(align_offset, alignment) - align_offset;
-  void* start = (uint8_t*)p - extra;
-  _mi_os_free_ex(start, size + extra, was_committed, tld_stats);
-}
-
-/* -----------------------------------------------------------
-  OS memory API: reset, commit, decommit, protect, unprotect.
------------------------------------------------------------ */
-
-
-// OS page align within a given area, either conservative (pages inside the area only),
-// or not (straddling pages outside the area is possible)
-static void* mi_os_page_align_areax(bool conservative, void* addr, size_t size, size_t* newsize) {
-  mi_assert(addr != NULL && size > 0);
-  if (newsize != NULL) *newsize = 0;
-  if (size == 0 || addr == NULL) return NULL;
-
-  // page align conservatively within the range
-  void* start = (conservative ? mi_align_up_ptr(addr, _mi_os_page_size())
-    : mi_align_down_ptr(addr, _mi_os_page_size()));
-  void* end = (conservative ? mi_align_down_ptr((uint8_t*)addr + size, _mi_os_page_size())
-    : mi_align_up_ptr((uint8_t*)addr + size, _mi_os_page_size()));
-  ptrdiff_t diff = (uint8_t*)end - (uint8_t*)start;
-  if (diff <= 0) return NULL;
-
-  mi_assert_internal((conservative && (size_t)diff <= size) || (!conservative && (size_t)diff >= size));
-  if (newsize != NULL) *newsize = (size_t)diff;
-  return start;
-}
-
-static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* newsize) {
-  return mi_os_page_align_areax(true, addr, size, newsize);
-}
-
-static void mi_mprotect_hint(int err) {
-#if defined(MI_OS_USE_MMAP) && (MI_SECURE>=2) // guard page around every mimalloc page
-  if (err == ENOMEM) {
-    _mi_warning_message("the previous warning may have been caused by a low memory map limit.\n"
-                        "  On Linux this is controlled by the vm.max_map_count. For example:\n"
-                        "  > sudo sysctl -w vm.max_map_count=262144\n");
-  }
-#else
-  MI_UNUSED(err);
-#endif
-}
-
-// Commit/Decommit memory.
-// Usually commit is aligned liberal, while decommit is aligned conservative.
-// (but not for the reset version where we want commit to be conservative as well)
-static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservative, bool* is_zero, mi_stats_t* stats) {
-  // page align in the range, commit liberally, decommit conservative
-  if (is_zero != NULL) { *is_zero = false; }
-  size_t csize;
-  void* start = mi_os_page_align_areax(conservative, addr, size, &csize);
-  if (csize == 0) return true;  // || _mi_os_is_huge_reserved(addr))
-  int err = 0;
-  if (commit) {
-    _mi_stat_increase(&stats->committed, size);  // use size for precise commit vs. decommit
-    _mi_stat_counter_increase(&stats->commit_calls, 1);
-  }
-  else {
-    _mi_stat_decrease(&stats->committed, size);
-  }
-
-  #if defined(_WIN32)
-  if (commit) {
-    // *is_zero = true;  // note: if the memory was already committed, the call succeeds but the memory is not zero'd
-    void* p = VirtualAlloc(start, csize, MEM_COMMIT, PAGE_READWRITE);
-    err = (p == start ? 0 : GetLastError());
-  }
-  else {
-    BOOL ok = VirtualFree(start, csize, MEM_DECOMMIT);
-    err = (ok ? 0 : GetLastError());
-  }
-  #elif defined(__wasi__)
-  // WebAssembly guests can't control memory protection
-  #elif 0 && defined(MAP_FIXED) && !defined(__APPLE__)
-  // Linux: disabled for now as mmap fixed seems much more expensive than MADV_DONTNEED (and splits VMA's?)
-  if (commit) {
-    // commit: just change the protection
-    err = mprotect(start, csize, (PROT_READ | PROT_WRITE));
-    if (err != 0) { err = errno; }
-  }
-  else {
-    // decommit: use mmap with MAP_FIXED to discard the existing memory (and reduce rss)
-    const int fd = mi_unix_mmap_fd();
-    void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0);
-    if (p != start) { err = errno; }
-  }
-  #else
-  // Linux, macOSX and others.
-  if (commit) {
-    // commit: ensure we can access the area
-    err = mprotect(start, csize, (PROT_READ | PROT_WRITE));
-    if (err != 0) { err = errno; }
-  }
-  else {
-    #if defined(MADV_DONTNEED) && MI_DEBUG == 0 && MI_SECURE == 0
-    // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE)
-    // (on the other hand, MADV_FREE would be good enough.. it is just not reflected in the stats :-( )
-    err = madvise(start, csize, MADV_DONTNEED);
-    #else
-    // decommit: just disable access (also used in debug and secure mode to trap on illegal access)
-    err = mprotect(start, csize, PROT_NONE);
-    if (err != 0) { err = errno; }
-    #endif
-    //#if defined(MADV_FREE_REUSE)
-    //  while ((err = mi_madvise(start, csize, MADV_FREE_REUSE)) != 0 && errno == EAGAIN) { errno = 0; }
-    //#endif
-  }
-  #endif
-  if (err != 0) {
-    _mi_warning_message("%s error: start: %p, csize: 0x%zx, err: %i\n", commit ? "commit" : "decommit", start, csize, err);
-    mi_mprotect_hint(err);
-  }
-  mi_assert_internal(err == 0);
-  return (err == 0);
-}
-
-bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) {
-  MI_UNUSED(tld_stats);
-  mi_stats_t* stats = &_mi_stats_main;
-  return mi_os_commitx(addr, size, true, false /* liberal */, is_zero, stats);
-}
-
-bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) {
-  MI_UNUSED(tld_stats);
-  mi_stats_t* stats = &_mi_stats_main;
-  bool is_zero;
-  return mi_os_commitx(addr, size, false, true /* conservative */, &is_zero, stats);
-}
-
-/*
-static bool mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) {  
-  return mi_os_commitx(addr, size, true, true // conservative
-                      , is_zero, stats);
-}
-*/
-
-// Signal to the OS that the address range is no longer in use
-// but may be used later again. This will release physical memory
-// pages and reduce swapping while keeping the memory committed.
-// We page align to a conservative area inside the range to reset.
-static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) {
-  // page align conservatively within the range
-  size_t csize;
-  void* start = mi_os_page_align_area_conservative(addr, size, &csize);
-  if (csize == 0) return true;  // || _mi_os_is_huge_reserved(addr)
-  if (reset) _mi_stat_increase(&stats->reset, csize);
-        else _mi_stat_decrease(&stats->reset, csize);
-  if (!reset) return true; // nothing to do on unreset!
-
-  #if (MI_DEBUG>1) && !MI_TRACK_ENABLED
-  if (MI_SECURE==0) {
-    memset(start, 0, csize); // pretend it is eagerly reset
-  }
-  #endif
-
-#if defined(_WIN32)
-  // Testing shows that for us (on `malloc-large`) MEM_RESET is 2x faster than DiscardVirtualMemory
-  void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE);
-  mi_assert_internal(p == start);
-  #if 1
-  if (p == start && start != NULL) {
-    VirtualUnlock(start,csize); // VirtualUnlock after MEM_RESET removes the memory from the working set
-  }
-  #endif
-  if (p != start) return false;
-#else
-#if defined(MADV_FREE)
-  static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE);
-  int oadvice = (int)mi_atomic_load_relaxed(&advice);
-  int err;
-  while ((err = mi_madvise(start, csize, oadvice)) != 0 && errno == EAGAIN) { errno = 0;  };
-  if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) {
-    // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on
-    mi_atomic_store_release(&advice, (size_t)MADV_DONTNEED);
-    err = mi_madvise(start, csize, MADV_DONTNEED);
-  }
-#elif defined(__wasi__)
-  int err = 0;
-#else
-  int err = mi_madvise(start, csize, MADV_DONTNEED);
-#endif
-  if (err != 0) {
-    _mi_warning_message("madvise reset error: start: %p, csize: 0x%zx, errno: %i\n", start, csize, errno);
-  }
-  //mi_assert(err == 0);
-  if (err != 0) return false;
-#endif
-  return true;
-}
-
-// Signal to the OS that the address range is no longer in use
-// but may be used later again. This will release physical memory
-// pages and reduce swapping while keeping the memory committed.
-// We page align to a conservative area inside the range to reset.
-bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats) {
-  MI_UNUSED(tld_stats);
-  mi_stats_t* stats = &_mi_stats_main;
-  return mi_os_resetx(addr, size, true, stats);
-}
-
-/*
-bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) {
-  MI_UNUSED(tld_stats);
-  mi_stats_t* stats = &_mi_stats_main;
-  *is_zero = false;
-  return mi_os_resetx(addr, size, false, stats);
-}
-*/
-
-// Protect a region in memory to be not accessible.
-static  bool mi_os_protectx(void* addr, size_t size, bool protect) {
-  // page align conservatively within the range
-  size_t csize = 0;
-  void* start = mi_os_page_align_area_conservative(addr, size, &csize);
-  if (csize == 0) return false;
-  /*
-  if (_mi_os_is_huge_reserved(addr)) {
-	  _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n");
-  }
-  */
-  int err = 0;
-#ifdef _WIN32
-  DWORD oldprotect = 0;
-  BOOL ok = VirtualProtect(start, csize, protect ? PAGE_NOACCESS : PAGE_READWRITE, &oldprotect);
-  err = (ok ? 0 : GetLastError());
-#elif defined(__wasi__)
-  err = 0;
-#else
-  err = mprotect(start, csize, protect ? PROT_NONE : (PROT_READ | PROT_WRITE));
-  if (err != 0) { err = errno; }
-#endif
-  if (err != 0) {
-    _mi_warning_message("mprotect error: start: %p, csize: 0x%zx, err: %i\n", start, csize, err);
-    mi_mprotect_hint(err);
-  }
-  return (err == 0);
-}
-
-bool _mi_os_protect(void* addr, size_t size) {
-  return mi_os_protectx(addr, size, true);
-}
-
-bool _mi_os_unprotect(void* addr, size_t size) {
-  return mi_os_protectx(addr, size, false);
-}
-
-
-
-bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) {
-  // page align conservatively within the range
-  mi_assert_internal(oldsize > newsize && p != NULL);
-  if (oldsize < newsize || p == NULL) return false;
-  if (oldsize == newsize) return true;
-
-  // oldsize and newsize should be page aligned or we cannot shrink precisely
-  void* addr = (uint8_t*)p + newsize;
-  size_t size = 0;
-  void* start = mi_os_page_align_area_conservative(addr, oldsize - newsize, &size);
-  if (size == 0 || start != addr) return false;
-
-#ifdef _WIN32
-  // we cannot shrink on windows, but we can decommit
-  return _mi_os_decommit(start, size, stats);
-#else
-  return mi_os_mem_free(start, size, true, stats);
-#endif
-}
-
-
-/* ----------------------------------------------------------------------------
-Support for allocating huge OS pages (1Gib) that are reserved up-front
-and possibly associated with a specific NUMA node. (use `numa_node>=0`)
------------------------------------------------------------------------------*/
-#define MI_HUGE_OS_PAGE_SIZE  (MI_GiB)
-
-#if defined(_WIN32) && (MI_INTPTR_SIZE >= 8)
-static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
-{
-  mi_assert_internal(size%MI_GiB == 0);
-  mi_assert_internal(addr != NULL);
-  const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE;
-
-  mi_win_enable_large_os_pages();
-
-  MI_MEM_EXTENDED_PARAMETER params[3] = { {{0,0},{0}},{{0,0},{0}},{{0,0},{0}} };
-  // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages
-  static bool mi_huge_pages_available = true;
-  if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) {
-    params[0].Type.Type = MiMemExtendedParameterAttributeFlags;
-    params[0].Arg.ULong64 = MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE;
-    ULONG param_count = 1;
-    if (numa_node >= 0) {
-      param_count++;
-      params[1].Type.Type = MiMemExtendedParameterNumaNode;
-      params[1].Arg.ULong = (unsigned)numa_node;
-    }
-    SIZE_T psize = size;
-    void* base = addr;
-    NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count);
-    if (err == 0 && base != NULL) {
-      return base;
-    }
-    else {
-      // fall back to regular large pages
-      mi_huge_pages_available = false; // don't try further huge pages
-      _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err);
-    }
-  }
-  // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation
-  if (pVirtualAlloc2 != NULL && numa_node >= 0) {
-    params[0].Type.Type = MiMemExtendedParameterNumaNode;
-    params[0].Arg.ULong = (unsigned)numa_node;
-    return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1);
-  }
-
-  // otherwise use regular virtual alloc on older windows
-  return VirtualAlloc(addr, size, flags, PAGE_READWRITE);
-}
-
-#elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8) && !defined(__HAIKU__)
-#include <sys/syscall.h>
-#ifndef MPOL_PREFERRED
-#define MPOL_PREFERRED 1
-#endif
-#if defined(SYS_mbind)
-static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) {
-  return syscall(SYS_mbind, start, len, mode, nmask, maxnode, flags);
-}
-#else
-static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) {
-  MI_UNUSED(start); MI_UNUSED(len); MI_UNUSED(mode); MI_UNUSED(nmask); MI_UNUSED(maxnode); MI_UNUSED(flags);
-  return 0;
-}
-#endif
-static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) {
-  mi_assert_internal(size%MI_GiB == 0);
-  bool is_large = true;
-  void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
-  if (p == NULL) return NULL;
-  if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes
-    unsigned long numa_mask = (1UL << numa_node);
-    // TODO: does `mbind` work correctly for huge OS pages? should we
-    // use `set_mempolicy` before calling mmap instead?
-    // see: <https://lkml.org/lkml/2017/2/9/875>
-    long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0);
-    if (err != 0) {
-      _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d: %s\n", numa_node, strerror(errno));
-    }
-  }
-  return p;
-}
-#else
-static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) {
-  MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(numa_node);
-  return NULL;
-}
-#endif
-
-#if (MI_INTPTR_SIZE >= 8)
-// To ensure proper alignment, use our own area for huge OS pages
-static mi_decl_cache_align _Atomic(uintptr_t)  mi_huge_start; // = 0
-
-// Claim an aligned address range for huge pages
-static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
-  if (total_size != NULL) *total_size = 0;
-  const size_t size = pages * MI_HUGE_OS_PAGE_SIZE;
-
-  uintptr_t start = 0;
-  uintptr_t end = 0;
-  uintptr_t huge_start = mi_atomic_load_relaxed(&mi_huge_start);
-  do {
-    start = huge_start;
-    if (start == 0) {
-      // Initialize the start address after the 32TiB area
-      start = ((uintptr_t)32 << 40);  // 32TiB virtual start address
-#if (MI_SECURE>0 || MI_DEBUG==0)      // security: randomize start of huge pages unless in debug mode
-      uintptr_t r = _mi_heap_random_next(mi_get_default_heap());
-      start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF));  // (randomly 12bits)*1GiB == between 0 to 4TiB
-#endif
-    }
-    end = start + size;
-    mi_assert_internal(end % MI_SEGMENT_SIZE == 0);
-  } while (!mi_atomic_cas_strong_acq_rel(&mi_huge_start, &huge_start, end));
-
-  if (total_size != NULL) *total_size = size;
-  return (uint8_t*)start;
-}
-#else
-static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
-  MI_UNUSED(pages);
-  if (total_size != NULL) *total_size = 0;
-  return NULL;
-}
-#endif
-
-// Allocate MI_SEGMENT_SIZE aligned huge pages
-void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize) {
-  if (psize != NULL) *psize = 0;
-  if (pages_reserved != NULL) *pages_reserved = 0;
-  size_t size = 0;
-  uint8_t* start = mi_os_claim_huge_pages(pages, &size);
-  if (start == NULL) return NULL; // or 32-bit systems
-
-  // Allocate one page at the time but try to place them contiguously
-  // We allocate one page at the time to be able to abort if it takes too long
-  // or to at least allocate as many as available on the system.
-  mi_msecs_t start_t = _mi_clock_start();
-  size_t page;
-  for (page = 0; page < pages; page++) {
-    // allocate a page
-    void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE);
-    void* p = mi_os_alloc_huge_os_pagesx(addr, MI_HUGE_OS_PAGE_SIZE, numa_node);
-
-    // Did we succeed at a contiguous address?
-    if (p != addr) {
-      // no success, issue a warning and break
-      if (p != NULL) {
-        _mi_warning_message("could not allocate contiguous huge page %zu at %p\n", page, addr);
-        _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main);
-      }
-      break;
-    }
-
-    // success, record it
-    _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE);
-    _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE);
-
-    // check for timeout
-    if (max_msecs > 0) {
-      mi_msecs_t elapsed = _mi_clock_end(start_t);
-      if (page >= 1) {
-        mi_msecs_t estimate = ((elapsed / (page+1)) * pages);
-        if (estimate > 2*max_msecs) { // seems like we are going to timeout, break
-          elapsed = max_msecs + 1;
-        }
-      }
-      if (elapsed > max_msecs) {
-        _mi_warning_message("huge page allocation timed out\n");
-        break;
-      }
-    }
-  }
-  mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size);
-  if (pages_reserved != NULL) { *pages_reserved = page; }
-  if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; }
-  return (page == 0 ? NULL : start);
-}
-
-// free every huge page in a range individually (as we allocated per page)
-// note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems.
-void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) {
-  if (p==NULL || size==0) return;
-  uint8_t* base = (uint8_t*)p;
-  while (size >= MI_HUGE_OS_PAGE_SIZE) {
-    _mi_os_free(base, MI_HUGE_OS_PAGE_SIZE, stats);
-    size -= MI_HUGE_OS_PAGE_SIZE;
-    base += MI_HUGE_OS_PAGE_SIZE;
-  }
-}
-
-/* ----------------------------------------------------------------------------
-Support NUMA aware allocation
------------------------------------------------------------------------------*/
-#ifdef _WIN32
-static size_t mi_os_numa_nodex(void) {
-  USHORT numa_node = 0;
-  if (pGetCurrentProcessorNumberEx != NULL && pGetNumaProcessorNodeEx != NULL) {
-    // Extended API is supported
-    MI_PROCESSOR_NUMBER pnum;
-    (*pGetCurrentProcessorNumberEx)(&pnum);
-    USHORT nnode = 0;
-    BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode);
-    if (ok) { numa_node = nnode; }
-  }
-  else if (pGetNumaProcessorNode != NULL) {
-    // Vista or earlier, use older API that is limited to 64 processors. Issue #277
-    DWORD pnum = GetCurrentProcessorNumber();
-    UCHAR nnode = 0;
-    BOOL ok = pGetNumaProcessorNode((UCHAR)pnum, &nnode);
-    if (ok) { numa_node = nnode; }
-  }
-  return numa_node;
-}
-
-static size_t mi_os_numa_node_countx(void) {
-  ULONG numa_max = 0;
-  GetNumaHighestNodeNumber(&numa_max);
-  // find the highest node number that has actual processors assigned to it. Issue #282
-  while(numa_max > 0) {
-    if (pGetNumaNodeProcessorMaskEx != NULL) {
-      // Extended API is supported
-      GROUP_AFFINITY affinity;
-      if ((*pGetNumaNodeProcessorMaskEx)((USHORT)numa_max, &affinity)) {
-        if (affinity.Mask != 0) break;  // found the maximum non-empty node
-      }
-    }
-    else {
-      // Vista or earlier, use older API that is limited to 64 processors.
-      ULONGLONG mask;
-      if (GetNumaNodeProcessorMask((UCHAR)numa_max, &mask)) {
-        if (mask != 0) break; // found the maximum non-empty node
-      };
-    }
-    // max node was invalid or had no processor assigned, try again
-    numa_max--;
-  }
-  return ((size_t)numa_max + 1);
-}
-#elif defined(__linux__)
-#include <sys/syscall.h>  // getcpu
-#include <stdio.h>        // access
-
-static size_t mi_os_numa_nodex(void) {
-#ifdef SYS_getcpu
-  unsigned long node = 0;
-  unsigned long ncpu = 0;
-  long err = syscall(SYS_getcpu, &ncpu, &node, NULL);
-  if (err != 0) return 0;
-  return node;
-#else
-  return 0;
-#endif
-}
-static size_t mi_os_numa_node_countx(void) {
-  char buf[128];
-  unsigned node = 0;
-  for(node = 0; node < 256; node++) {
-    // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation)
-    snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1);
-    if (access(buf,R_OK) != 0) break;
-  }
-  return (node+1);
-}
-#elif defined(__FreeBSD__) && __FreeBSD_version >= 1200000
-static size_t mi_os_numa_nodex(void) {
-  domainset_t dom;
-  size_t node;
-  int policy;
-  if (cpuset_getdomain(CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, sizeof(dom), &dom, &policy) == -1) return 0ul;
-  for (node = 0; node < MAXMEMDOM; node++) {
-    if (DOMAINSET_ISSET(node, &dom)) return node;
-  }
-  return 0ul;
-}
-static size_t mi_os_numa_node_countx(void) {
-  size_t ndomains = 0;
-  size_t len = sizeof(ndomains);
-  if (sysctlbyname("vm.ndomains", &ndomains, &len, NULL, 0) == -1) return 0ul;
-  return ndomains;
-}
-#elif defined(__DragonFly__)
-static size_t mi_os_numa_nodex(void) {
-  // TODO: DragonFly does not seem to provide any userland means to get this information.
-  return 0ul;
-}
-static size_t mi_os_numa_node_countx(void) {
-  size_t ncpus = 0, nvirtcoresperphys = 0;
-  size_t len = sizeof(size_t);
-  if (sysctlbyname("hw.ncpu", &ncpus, &len, NULL, 0) == -1) return 0ul;
-  if (sysctlbyname("hw.cpu_topology_ht_ids", &nvirtcoresperphys, &len, NULL, 0) == -1) return 0ul;
-  return nvirtcoresperphys * ncpus;
-}
-#else
-static size_t mi_os_numa_nodex(void) {
-  return 0;
-}
-static size_t mi_os_numa_node_countx(void) {
-  return 1;
-}
-#endif
-
-_Atomic(size_t)  _mi_numa_node_count; // = 0   // cache the node count
-
-size_t _mi_os_numa_node_count_get(void) {
-  size_t count = mi_atomic_load_acquire(&_mi_numa_node_count);
-  if (count <= 0) {
-    long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly?
-    if (ncount > 0) {
-      count = (size_t)ncount;
-    }
-    else {
-      count = mi_os_numa_node_countx(); // or detect dynamically
-      if (count == 0) count = 1;
-    }
-    mi_atomic_store_release(&_mi_numa_node_count, count); // save it
-    _mi_verbose_message("using %zd numa regions\n", count);
-  }
-  return count;
-}
-
-int _mi_os_numa_node_get(mi_os_tld_t* tld) {
-  MI_UNUSED(tld);
-  size_t numa_count = _mi_os_numa_node_count();
-  if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
-  // never more than the node count and >= 0
-  size_t numa_node = mi_os_numa_nodex();
-  if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
-  return (int)numa_node;
-}
diff --git a/3rd/mimalloc-2.0.9/src/page-queue.c b/3rd/mimalloc-2.0.9/src/page-queue.c
deleted file mode 100644
index cb54b374..00000000
--- a/3rd/mimalloc-2.0.9/src/page-queue.c
+++ /dev/null
@@ -1,332 +0,0 @@
-/*----------------------------------------------------------------------------
-Copyright (c) 2018-2020, Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
------------------------------------------------------------------------------*/
-
-/* -----------------------------------------------------------
-  Definition of page queues for each block size
------------------------------------------------------------ */
-
-#ifndef MI_IN_PAGE_C
-#error "this file should be included from 'page.c'"
-#endif
-
-/* -----------------------------------------------------------
-  Minimal alignment in machine words (i.e. `sizeof(void*)`)
------------------------------------------------------------ */
-
-#if (MI_MAX_ALIGN_SIZE > 4*MI_INTPTR_SIZE)
-  #error "define alignment for more than 4x word size for this platform"
-#elif (MI_MAX_ALIGN_SIZE > 2*MI_INTPTR_SIZE)
-  #define MI_ALIGN4W   // 4 machine words minimal alignment
-#elif (MI_MAX_ALIGN_SIZE > MI_INTPTR_SIZE)
-  #define MI_ALIGN2W   // 2 machine words minimal alignment
-#else
-  // ok, default alignment is 1 word
-#endif
-
-
-/* -----------------------------------------------------------
-  Queue query
------------------------------------------------------------ */
-
-
-static inline bool mi_page_queue_is_huge(const mi_page_queue_t* pq) {
-  return (pq->block_size == (MI_MEDIUM_OBJ_SIZE_MAX+sizeof(uintptr_t)));
-}
-
-static inline bool mi_page_queue_is_full(const mi_page_queue_t* pq) {
-  return (pq->block_size == (MI_MEDIUM_OBJ_SIZE_MAX+(2*sizeof(uintptr_t))));
-}
-
-static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) {
-  return (pq->block_size > MI_MEDIUM_OBJ_SIZE_MAX);
-}
-
-/* -----------------------------------------------------------
-  Bins
------------------------------------------------------------ */
-
-// Return the bin for a given field size.
-// Returns MI_BIN_HUGE if the size is too large.
-// We use `wsize` for the size in "machine word sizes",
-// i.e. byte size == `wsize*sizeof(void*)`.
-static inline uint8_t mi_bin(size_t size) {
-  size_t wsize = _mi_wsize_from_size(size);
-  uint8_t bin;
-  if (wsize <= 1) {
-    bin = 1;
-  }
-  #if defined(MI_ALIGN4W)
-  else if (wsize <= 4) {
-    bin = (uint8_t)((wsize+1)&~1); // round to double word sizes
-  }
-  #elif defined(MI_ALIGN2W)
-  else if (wsize <= 8) {
-    bin = (uint8_t)((wsize+1)&~1); // round to double word sizes
-  }
-  #else
-  else if (wsize <= 8) {
-    bin = (uint8_t)wsize;
-  }
-  #endif
-  else if (wsize > MI_MEDIUM_OBJ_WSIZE_MAX) {
-    bin = MI_BIN_HUGE;
-  }
-  else {
-    #if defined(MI_ALIGN4W)
-    if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes
-    #endif
-    wsize--;
-    // find the highest bit
-    uint8_t b = (uint8_t)mi_bsr(wsize);  // note: wsize != 0
-    // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
-    // - adjust with 3 because we use do not round the first 8 sizes
-    //   which each get an exact bin
-    bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3;
-    mi_assert_internal(bin < MI_BIN_HUGE);
-  }
-  mi_assert_internal(bin > 0 && bin <= MI_BIN_HUGE);
-  return bin;
-}
-
-
-
-/* -----------------------------------------------------------
-  Queue of pages with free blocks
------------------------------------------------------------ */
-
-uint8_t _mi_bin(size_t size) {
-  return mi_bin(size);
-}
-
-size_t _mi_bin_size(uint8_t bin) {
-  return _mi_heap_empty.pages[bin].block_size;
-}
-
-// Good size for allocation
-size_t mi_good_size(size_t size) mi_attr_noexcept {
-  if (size <= MI_MEDIUM_OBJ_SIZE_MAX) {
-    return _mi_bin_size(mi_bin(size));
-  }
-  else {
-    return _mi_align_up(size,_mi_os_page_size());
-  }
-}
-
-#if (MI_DEBUG>1)
-static bool mi_page_queue_contains(mi_page_queue_t* queue, const mi_page_t* page) {
-  mi_assert_internal(page != NULL);
-  mi_page_t* list = queue->first;
-  while (list != NULL) {
-    mi_assert_internal(list->next == NULL || list->next->prev == list);
-    mi_assert_internal(list->prev == NULL || list->prev->next == list);
-    if (list == page) break;
-    list = list->next;
-  }
-  return (list == page);
-}
-
-#endif
-
-#if (MI_DEBUG>1)
-static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t* pq) {
-  return (pq >= &heap->pages[0] && pq <= &heap->pages[MI_BIN_FULL]);
-}
-#endif
-
-static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) {
-  uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : mi_bin(page->xblock_size));
-  mi_heap_t* heap = mi_page_heap(page);
-  mi_assert_internal(heap != NULL && bin <= MI_BIN_FULL);
-  mi_page_queue_t* pq = &heap->pages[bin];
-  mi_assert_internal(bin >= MI_BIN_HUGE || page->xblock_size == pq->block_size);
-  mi_assert_expensive(mi_page_queue_contains(pq, page));
-  return pq;
-}
-
-static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) {
-  uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : mi_bin(page->xblock_size));
-  mi_assert_internal(bin <= MI_BIN_FULL);
-  mi_page_queue_t* pq = &heap->pages[bin];
-  mi_assert_internal(mi_page_is_in_full(page) || page->xblock_size == pq->block_size);
-  return pq;
-}
-
-// The current small page array is for efficiency and for each
-// small size (up to 256) it points directly to the page for that
-// size without having to compute the bin. This means when the
-// current free page queue is updated for a small bin, we need to update a
-// range of entries in `_mi_page_small_free`.
-static inline void mi_heap_queue_first_update(mi_heap_t* heap, const mi_page_queue_t* pq) {
-  mi_assert_internal(mi_heap_contains_queue(heap,pq));
-  size_t size = pq->block_size;
-  if (size > MI_SMALL_SIZE_MAX) return;
-
-  mi_page_t* page = pq->first;
-  if (pq->first == NULL) page = (mi_page_t*)&_mi_page_empty;
-
-  // find index in the right direct page array
-  size_t start;
-  size_t idx = _mi_wsize_from_size(size);
-  mi_page_t** pages_free = heap->pages_free_direct;
-
-  if (pages_free[idx] == page) return;  // already set
-
-  // find start slot
-  if (idx<=1) {
-    start = 0;
-  }
-  else {
-    // find previous size; due to minimal alignment upto 3 previous bins may need to be skipped
-    uint8_t bin = mi_bin(size);
-    const mi_page_queue_t* prev = pq - 1;
-    while( bin == mi_bin(prev->block_size) && prev > &heap->pages[0]) {
-      prev--;
-    }
-    start = 1 + _mi_wsize_from_size(prev->block_size);
-    if (start > idx) start = idx;
-  }
-
-  // set size range to the right page
-  mi_assert(start <= idx);
-  for (size_t sz = start; sz <= idx; sz++) {
-    pages_free[sz] = page;
-  }
-}
-
-/*
-static bool mi_page_queue_is_empty(mi_page_queue_t* queue) {
-  return (queue->first == NULL);
-}
-*/
-
-static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {
-  mi_assert_internal(page != NULL);
-  mi_assert_expensive(mi_page_queue_contains(queue, page));
-  mi_assert_internal(page->xblock_size == queue->block_size || (page->xblock_size > MI_MEDIUM_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue))  || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
-  mi_heap_t* heap = mi_page_heap(page);
-
-  if (page->prev != NULL) page->prev->next = page->next;
-  if (page->next != NULL) page->next->prev = page->prev;
-  if (page == queue->last)  queue->last = page->prev;
-  if (page == queue->first) {
-    queue->first = page->next;
-    // update first
-    mi_assert_internal(mi_heap_contains_queue(heap, queue));
-    mi_heap_queue_first_update(heap,queue);
-  }
-  heap->page_count--;
-  page->next = NULL;
-  page->prev = NULL;
-  // mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), NULL);
-  mi_page_set_in_full(page,false);
-}
-
-
-static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) {
-  mi_assert_internal(mi_page_heap(page) == heap);
-  mi_assert_internal(!mi_page_queue_contains(queue, page));
-  #if MI_HUGE_PAGE_ABANDON
-  mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
-  #endif
-  mi_assert_internal(page->xblock_size == queue->block_size ||
-                      (page->xblock_size > MI_MEDIUM_OBJ_SIZE_MAX) ||
-                        (mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
-
-  mi_page_set_in_full(page, mi_page_queue_is_full(queue));
-  // mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), heap);
-  page->next = queue->first;
-  page->prev = NULL;
-  if (queue->first != NULL) {
-    mi_assert_internal(queue->first->prev == NULL);
-    queue->first->prev = page;
-    queue->first = page;
-  }
-  else {
-    queue->first = queue->last = page;
-  }
-
-  // update direct
-  mi_heap_queue_first_update(heap, queue);
-  heap->page_count++;
-}
-
-
-static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) {
-  mi_assert_internal(page != NULL);
-  mi_assert_expensive(mi_page_queue_contains(from, page));
-  mi_assert_expensive(!mi_page_queue_contains(to, page));
-
-  mi_assert_internal((page->xblock_size == to->block_size && page->xblock_size == from->block_size) ||
-                     (page->xblock_size == to->block_size && mi_page_queue_is_full(from)) ||
-                     (page->xblock_size == from->block_size && mi_page_queue_is_full(to)) ||
-                     (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(to)) ||
-                     (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_full(to)));
-
-  mi_heap_t* heap = mi_page_heap(page);
-  if (page->prev != NULL) page->prev->next = page->next;
-  if (page->next != NULL) page->next->prev = page->prev;
-  if (page == from->last)  from->last = page->prev;
-  if (page == from->first) {
-    from->first = page->next;
-    // update first
-    mi_assert_internal(mi_heap_contains_queue(heap, from));
-    mi_heap_queue_first_update(heap, from);
-  }
-
-  page->prev = to->last;
-  page->next = NULL;
-  if (to->last != NULL) {
-    mi_assert_internal(heap == mi_page_heap(to->last));
-    to->last->next = page;
-    to->last = page;
-  }
-  else {
-    to->first = page;
-    to->last = page;
-    mi_heap_queue_first_update(heap, to);
-  }
-
-  mi_page_set_in_full(page, mi_page_queue_is_full(to));
-}
-
-// Only called from `mi_heap_absorb`.
-size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append) {
-  mi_assert_internal(mi_heap_contains_queue(heap,pq));
-  mi_assert_internal(pq->block_size == append->block_size);
-
-  if (append->first==NULL) return 0;
-
-  // set append pages to new heap and count
-  size_t count = 0;
-  for (mi_page_t* page = append->first; page != NULL; page = page->next) {
-    // inline `mi_page_set_heap` to avoid wrong assertion during absorption;
-    // in this case it is ok to be delayed freeing since both "to" and "from" heap are still alive.
-    mi_atomic_store_release(&page->xheap, (uintptr_t)heap);
-    // set the flag to delayed free (not overriding NEVER_DELAYED_FREE) which has as a
-    // side effect that it spins until any DELAYED_FREEING is finished. This ensures
-    // that after appending only the new heap will be used for delayed free operations.
-    _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false);
-    count++;
-  }
-
-  if (pq->last==NULL) {
-    // take over afresh
-    mi_assert_internal(pq->first==NULL);
-    pq->first = append->first;
-    pq->last = append->last;
-    mi_heap_queue_first_update(heap, pq);
-  }
-  else {
-    // append to end
-    mi_assert_internal(pq->last!=NULL);
-    mi_assert_internal(append->first!=NULL);
-    pq->last->next = append->first;
-    append->first->prev = pq->last;
-    pq->last = append->last;
-  }
-  return count;
-}
diff --git a/3rd/mimalloc-2.0.9/src/page.c b/3rd/mimalloc-2.0.9/src/page.c
deleted file mode 100644
index 4250ff35..00000000
--- a/3rd/mimalloc-2.0.9/src/page.c
+++ /dev/null
@@ -1,926 +0,0 @@
-/*----------------------------------------------------------------------------
-Copyright (c) 2018-2020, Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
------------------------------------------------------------------------------*/
-
-/* -----------------------------------------------------------
-  The core of the allocator. Every segment contains
-  pages of a certain block size. The main function
-  exported is `mi_malloc_generic`.
------------------------------------------------------------ */
-
-#include "mimalloc.h"
-#include "mimalloc-internal.h"
-#include "mimalloc-atomic.h"
-
-/* -----------------------------------------------------------
-  Definition of page queues for each block size
------------------------------------------------------------ */
-
-#define MI_IN_PAGE_C
-#include "page-queue.c"
-#undef MI_IN_PAGE_C
-
-
-/* -----------------------------------------------------------
-  Page helpers
------------------------------------------------------------ */
-
-// Index a block in a page
-static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_start, size_t block_size, size_t i) {
-  MI_UNUSED(page);
-  mi_assert_internal(page != NULL);
-  mi_assert_internal(i <= page->reserved);
-  return (mi_block_t*)((uint8_t*)page_start + (i * block_size));
-}
-
-static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld);
-static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld);
-
-#if (MI_DEBUG>=3)
-static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) {
-  size_t count = 0;
-  while (head != NULL) {
-    mi_assert_internal(page == _mi_ptr_page(head));
-    count++;
-    head = mi_block_next(page, head);
-  }
-  return count;
-}
-
-/*
-// Start of the page available memory
-static inline uint8_t* mi_page_area(const mi_page_t* page) {
-  return _mi_page_start(_mi_page_segment(page), page, NULL);
-}
-*/
-
-static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) {
-  size_t psize;
-  uint8_t* page_area = _mi_page_start(_mi_page_segment(page), page, &psize);
-  mi_block_t* start = (mi_block_t*)page_area;
-  mi_block_t* end   = (mi_block_t*)(page_area + psize);
-  while(p != NULL) {
-    if (p < start || p >= end) return false;
-    p = mi_block_next(page, p);
-  }
-  return true;
-}
-
-static bool mi_page_is_valid_init(mi_page_t* page) {
-  mi_assert_internal(page->xblock_size > 0);
-  mi_assert_internal(page->used <= page->capacity);
-  mi_assert_internal(page->capacity <= page->reserved);
-
-  mi_segment_t* segment = _mi_page_segment(page);
-  uint8_t* start = _mi_page_start(segment,page,NULL);
-  mi_assert_internal(start == _mi_segment_page_start(segment,page,NULL));
-  //const size_t bsize = mi_page_block_size(page);
-  //mi_assert_internal(start + page->capacity*page->block_size == page->top);
-
-  mi_assert_internal(mi_page_list_is_valid(page,page->free));
-  mi_assert_internal(mi_page_list_is_valid(page,page->local_free));
-
-  #if MI_DEBUG>3 // generally too expensive to check this
-  if (page->is_zero) {
-    const size_t ubsize = mi_page_usable_block_size(page);
-    for(mi_block_t* block = page->free; block != NULL; block = mi_block_next(page,block)) {
-      mi_assert_expensive(mi_mem_is_zero(block + 1, ubsize - sizeof(mi_block_t)));
-    }
-  }
-  #endif
-
-  mi_block_t* tfree = mi_page_thread_free(page);
-  mi_assert_internal(mi_page_list_is_valid(page, tfree));
-  //size_t tfree_count = mi_page_list_count(page, tfree);
-  //mi_assert_internal(tfree_count <= page->thread_freed + 1);
-
-  size_t free_count = mi_page_list_count(page, page->free) + mi_page_list_count(page, page->local_free);
-  mi_assert_internal(page->used + free_count == page->capacity);
-
-  return true;
-}
-
-bool _mi_page_is_valid(mi_page_t* page) {
-  mi_assert_internal(mi_page_is_valid_init(page));
-  #if MI_SECURE
-  mi_assert_internal(page->keys[0] != 0);
-  #endif
-  if (mi_page_heap(page)!=NULL) {
-    mi_segment_t* segment = _mi_page_segment(page);
-
-    mi_assert_internal(!_mi_process_is_initialized || segment->thread_id==0 || segment->thread_id == mi_page_heap(page)->thread_id);
-    #if MI_HUGE_PAGE_ABANDON
-    if (segment->kind != MI_SEGMENT_HUGE) 
-    #endif
-    {    
-      mi_page_queue_t* pq = mi_page_queue_of(page);
-      mi_assert_internal(mi_page_queue_contains(pq, page));
-      mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_MEDIUM_OBJ_SIZE_MAX || mi_page_is_in_full(page));
-      mi_assert_internal(mi_heap_contains_queue(mi_page_heap(page),pq));
-    }
-  }
-  return true;
-}
-#endif
-
-void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) {
-  while (!_mi_page_try_use_delayed_free(page, delay, override_never)) {
-    mi_atomic_yield();
-  }
-}
-
-bool _mi_page_try_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) {
-  mi_thread_free_t tfreex;
-  mi_delayed_t     old_delay;
-  mi_thread_free_t tfree;
-  size_t yield_count = 0;
-  do {
-    tfree = mi_atomic_load_acquire(&page->xthread_free); // note: must acquire as we can break/repeat this loop and not do a CAS;
-    tfreex = mi_tf_set_delayed(tfree, delay);
-    old_delay = mi_tf_delayed(tfree);
-    if mi_unlikely(old_delay == MI_DELAYED_FREEING) {
-      if (yield_count >= 4) return false;  // give up after 4 tries
-      yield_count++;
-      mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done.
-      // tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail
-    }
-    else if (delay == old_delay) {
-      break; // avoid atomic operation if already equal
-    }
-    else if (!override_never && old_delay == MI_NEVER_DELAYED_FREE) {
-      break; // leave never-delayed flag set
-    }
-  } while ((old_delay == MI_DELAYED_FREEING) ||
-           !mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
-
-  return true; // success
-}
-
-/* -----------------------------------------------------------
-  Page collect the `local_free` and `thread_free` lists
------------------------------------------------------------ */
-
-// Collect the local `thread_free` list using an atomic exchange.
-// Note: The exchange must be done atomically as this is used right after
-// moving to the full list in `mi_page_collect_ex` and we need to
-// ensure that there was no race where the page became unfull just before the move.
-static void _mi_page_thread_free_collect(mi_page_t* page)
-{
-  mi_block_t* head;
-  mi_thread_free_t tfreex;
-  mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free);
-  do {
-    head = mi_tf_block(tfree);
-    tfreex = mi_tf_set_block(tfree,NULL);
-  } while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tfree, tfreex));
-
-  // return if the list is empty
-  if (head == NULL) return;
-
-  // find the tail -- also to get a proper count (without data races)
-  uint32_t max_count = page->capacity; // cannot collect more than capacity
-  uint32_t count = 1;
-  mi_block_t* tail = head;
-  mi_block_t* next;
-  while ((next = mi_block_next(page,tail)) != NULL && count <= max_count) {
-    count++;
-    tail = next;
-  }
-  // if `count > max_count` there was a memory corruption (possibly infinite list due to double multi-threaded free)
-  if (count > max_count) {
-    _mi_error_message(EFAULT, "corrupted thread-free list\n");
-    return; // the thread-free items cannot be freed
-  }
-
-  // and append the current local free list
-  mi_block_set_next(page,tail, page->local_free);
-  page->local_free = head;
-
-  // update counts now
-  page->used -= count;
-}
-
-void _mi_page_free_collect(mi_page_t* page, bool force) {
-  mi_assert_internal(page!=NULL);
-
-  // collect the thread free list
-  if (force || mi_page_thread_free(page) != NULL) {  // quick test to avoid an atomic operation
-    _mi_page_thread_free_collect(page);
-  }
-
-  // and the local free list
-  if (page->local_free != NULL) {
-    if mi_likely(page->free == NULL) {
-      // usual case
-      page->free = page->local_free;
-      page->local_free = NULL;
-      page->is_zero = false;
-    }
-    else if (force) {
-      // append -- only on shutdown (force) as this is a linear operation
-      mi_block_t* tail = page->local_free;
-      mi_block_t* next;
-      while ((next = mi_block_next(page, tail)) != NULL) {
-        tail = next;
-      }
-      mi_block_set_next(page, tail, page->free);
-      page->free = page->local_free;
-      page->local_free = NULL;
-      page->is_zero = false;
-    }
-  }
-
-  mi_assert_internal(!force || page->local_free == NULL);
-}
-
-
-
-/* -----------------------------------------------------------
-  Page fresh and retire
------------------------------------------------------------ */
-
-// called from segments when reclaiming abandoned pages
-void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
-  mi_assert_expensive(mi_page_is_valid_init(page));
-
-  mi_assert_internal(mi_page_heap(page) == heap);
-  mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE);
-  #if MI_HUGE_PAGE_ABANDON
-  mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
-  #endif
-  mi_assert_internal(!page->is_reset);
-  // TODO: push on full queue immediately if it is full?
-  mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page));
-  mi_page_queue_push(heap, pq, page);
-  mi_assert_expensive(_mi_page_is_valid(page));
-}
-
-// allocate a fresh page from a segment
-static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size, size_t page_alignment) {
-  #if !MI_HUGE_PAGE_ABANDON
-  mi_assert_internal(pq != NULL);
-  mi_assert_internal(mi_heap_contains_queue(heap, pq));
-  mi_assert_internal(page_alignment > 0 || block_size > MI_MEDIUM_OBJ_SIZE_MAX || block_size == pq->block_size);
-  #endif
-  mi_page_t* page = _mi_segment_page_alloc(heap, block_size, page_alignment, &heap->tld->segments, &heap->tld->os);
-  if (page == NULL) {
-    // this may be out-of-memory, or an abandoned page was reclaimed (and in our queue)
-    return NULL;
-  }
-  mi_assert_internal(page_alignment >0 || block_size > MI_MEDIUM_OBJ_SIZE_MAX || _mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
-  mi_assert_internal(pq!=NULL || page->xblock_size != 0);
-  mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size);
-  // a fresh page was found, initialize it
-  const size_t full_block_size = ((pq == NULL || mi_page_queue_is_huge(pq)) ? mi_page_block_size(page) : block_size); // see also: mi_segment_huge_page_alloc
-  mi_assert_internal(full_block_size >= block_size);
-  mi_page_init(heap, page, full_block_size, heap->tld);
-  mi_heap_stat_increase(heap, pages, 1);
-  if (pq != NULL) { mi_page_queue_push(heap, pq, page); }
-  mi_assert_expensive(_mi_page_is_valid(page));
-  return page;
-}
-
-// Get a fresh page to use
-static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) {
-  mi_assert_internal(mi_heap_contains_queue(heap, pq));
-  mi_page_t* page = mi_page_fresh_alloc(heap, pq, pq->block_size, 0);
-  if (page==NULL) return NULL;
-  mi_assert_internal(pq->block_size==mi_page_block_size(page));
-  mi_assert_internal(pq==mi_page_queue(heap, mi_page_block_size(page)));
-  return page;
-}
-
-/* -----------------------------------------------------------
-   Do any delayed frees
-   (put there by other threads if they deallocated in a full page)
------------------------------------------------------------ */
-void _mi_heap_delayed_free_all(mi_heap_t* heap) {
-  while (!_mi_heap_delayed_free_partial(heap)) {
-    mi_atomic_yield();
-  }
-}
-
-// returns true if all delayed frees were processed
-bool _mi_heap_delayed_free_partial(mi_heap_t* heap) {
-  // take over the list (note: no atomic exchange since it is often NULL)
-  mi_block_t* block = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
-  while (block != NULL && !mi_atomic_cas_ptr_weak_acq_rel(mi_block_t, &heap->thread_delayed_free, &block, NULL)) { /* nothing */ };
-  bool all_freed = true;
-
-  // and free them all
-  while(block != NULL) {
-    mi_block_t* next = mi_block_nextx(heap,block, heap->keys);
-    // use internal free instead of regular one to keep stats etc correct
-    if (!_mi_free_delayed_block(block)) {
-      // we might already start delayed freeing while another thread has not yet
-      // reset the delayed_freeing flag; in that case delay it further by reinserting the current block
-      // into the delayed free list
-      all_freed = false;
-      mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
-      do {
-        mi_block_set_nextx(heap, block, dfree, heap->keys);
-      } while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block));
-    }
-    block = next;
-  }
-  return all_freed;
-}
-
-/* -----------------------------------------------------------
-  Unfull, abandon, free and retire
------------------------------------------------------------ */
-
-// Move a page from the full list back to a regular list
-void _mi_page_unfull(mi_page_t* page) {
-  mi_assert_internal(page != NULL);
-  mi_assert_expensive(_mi_page_is_valid(page));
-  mi_assert_internal(mi_page_is_in_full(page));
-  if (!mi_page_is_in_full(page)) return;
-
-  mi_heap_t* heap = mi_page_heap(page);
-  mi_page_queue_t* pqfull = &heap->pages[MI_BIN_FULL];
-  mi_page_set_in_full(page, false); // to get the right queue
-  mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page);
-  mi_page_set_in_full(page, true);
-  mi_page_queue_enqueue_from(pq, pqfull, page);
-}
-
-static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) {
-  mi_assert_internal(pq == mi_page_queue_of(page));
-  mi_assert_internal(!mi_page_immediate_available(page));
-  mi_assert_internal(!mi_page_is_in_full(page));
-
-  if (mi_page_is_in_full(page)) return;
-  mi_page_queue_enqueue_from(&mi_page_heap(page)->pages[MI_BIN_FULL], pq, page);
-  _mi_page_free_collect(page,false);  // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set
-}
-
-
-// Abandon a page with used blocks at the end of a thread.
-// Note: only call if it is ensured that no references exist from
-// the `page->heap->thread_delayed_free` into this page.
-// Currently only called through `mi_heap_collect_ex` which ensures this.
-void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
-  mi_assert_internal(page != NULL);
-  mi_assert_expensive(_mi_page_is_valid(page));
-  mi_assert_internal(pq == mi_page_queue_of(page));
-  mi_assert_internal(mi_page_heap(page) != NULL);
-
-  mi_heap_t* pheap = mi_page_heap(page);
-
-  // remove from our page list
-  mi_segments_tld_t* segments_tld = &pheap->tld->segments;
-  mi_page_queue_remove(pq, page);
-
-  // page is no longer associated with our heap
-  mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE);
-  mi_page_set_heap(page, NULL);
-
-#if MI_DEBUG>1
-  // check there are no references left..
-  for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->keys)) {
-    mi_assert_internal(_mi_ptr_page(block) != page);
-  }
-#endif
-
-  // and abandon it
-  mi_assert_internal(mi_page_heap(page) == NULL);
-  _mi_segment_page_abandon(page,segments_tld);
-}
-
-
-// Free a page with no more free blocks
-void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
-  mi_assert_internal(page != NULL);
-  mi_assert_expensive(_mi_page_is_valid(page));
-  mi_assert_internal(pq == mi_page_queue_of(page));
-  mi_assert_internal(mi_page_all_free(page));
-  mi_assert_internal(mi_page_thread_free_flag(page)!=MI_DELAYED_FREEING);
-
-  // no more aligned blocks in here
-  mi_page_set_has_aligned(page, false);
-
-  mi_heap_t* heap = mi_page_heap(page);
-
-  // remove from the page list
-  // (no need to do _mi_heap_delayed_free first as all blocks are already free)
-  mi_segments_tld_t* segments_tld = &heap->tld->segments;
-  mi_page_queue_remove(pq, page);
-
-  // and free it
-  mi_page_set_heap(page,NULL);
-  _mi_segment_page_free(page, force, segments_tld);
-}
-
-// Retire parameters
-#define MI_MAX_RETIRE_SIZE    (MI_MEDIUM_OBJ_SIZE_MAX)
-#define MI_RETIRE_CYCLES      (8)
-
-// Retire a page with no more used blocks
-// Important to not retire too quickly though as new
-// allocations might coming.
-// Note: called from `mi_free` and benchmarks often
-// trigger this due to freeing everything and then
-// allocating again so careful when changing this.
-void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
-  mi_assert_internal(page != NULL);
-  mi_assert_expensive(_mi_page_is_valid(page));
-  mi_assert_internal(mi_page_all_free(page));
-  
-  mi_page_set_has_aligned(page, false);
-
-  // don't retire too often..
-  // (or we end up retiring and re-allocating most of the time)
-  // NOTE: refine this more: we should not retire if this
-  // is the only page left with free blocks. It is not clear
-  // how to check this efficiently though...
-  // for now, we don't retire if it is the only page left of this size class.
-  mi_page_queue_t* pq = mi_page_queue_of(page);
-  if mi_likely(page->xblock_size <= MI_MAX_RETIRE_SIZE && !mi_page_queue_is_special(pq)) {  // not too large && not full or huge queue?
-    if (pq->last==page && pq->first==page) { // the only page in the queue?
-      mi_stat_counter_increase(_mi_stats_main.page_no_retire,1);
-      page->retire_expire = 1 + (page->xblock_size <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);      
-      mi_heap_t* heap = mi_page_heap(page);
-      mi_assert_internal(pq >= heap->pages);
-      const size_t index = pq - heap->pages;
-      mi_assert_internal(index < MI_BIN_FULL && index < MI_BIN_HUGE);
-      if (index < heap->page_retired_min) heap->page_retired_min = index;
-      if (index > heap->page_retired_max) heap->page_retired_max = index;
-      mi_assert_internal(mi_page_all_free(page));
-      return; // dont't free after all
-    }
-  }
-  _mi_page_free(page, pq, false);
-}
-
-// free retired pages: we don't need to look at the entire queues
-// since we only retire pages that are at the head position in a queue.
-void _mi_heap_collect_retired(mi_heap_t* heap, bool force) {
-  size_t min = MI_BIN_FULL;
-  size_t max = 0;
-  for(size_t bin = heap->page_retired_min; bin <= heap->page_retired_max; bin++) {
-    mi_page_queue_t* pq   = &heap->pages[bin];
-    mi_page_t*       page = pq->first;
-    if (page != NULL && page->retire_expire != 0) {
-      if (mi_page_all_free(page)) {
-        page->retire_expire--;
-        if (force || page->retire_expire == 0) {
-          _mi_page_free(pq->first, pq, force);
-        }
-        else {
-          // keep retired, update min/max
-          if (bin < min) min = bin;
-          if (bin > max) max = bin;
-        }
-      }
-      else {
-        page->retire_expire = 0;
-      }
-    }
-  }
-  heap->page_retired_min = min;
-  heap->page_retired_max = max;
-}
-
-
-/* -----------------------------------------------------------
-  Initialize the initial free list in a page.
-  In secure mode we initialize a randomized list by
-  alternating between slices.
------------------------------------------------------------ */
-
-#define MI_MAX_SLICE_SHIFT  (6)   // at most 64 slices
-#define MI_MAX_SLICES       (1UL << MI_MAX_SLICE_SHIFT)
-#define MI_MIN_SLICES       (2)
-
-static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats) {
-  MI_UNUSED(stats);
-  #if (MI_SECURE<=2)
-  mi_assert_internal(page->free == NULL);
-  mi_assert_internal(page->local_free == NULL);
-  #endif
-  mi_assert_internal(page->capacity + extend <= page->reserved);
-  mi_assert_internal(bsize == mi_page_block_size(page));
-  void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL);
-
-  // initialize a randomized free list
-  // set up `slice_count` slices to alternate between
-  size_t shift = MI_MAX_SLICE_SHIFT;
-  while ((extend >> shift) == 0) {
-    shift--;
-  }
-  const size_t slice_count = (size_t)1U << shift;
-  const size_t slice_extend = extend / slice_count;
-  mi_assert_internal(slice_extend >= 1);
-  mi_block_t* blocks[MI_MAX_SLICES];   // current start of the slice
-  size_t      counts[MI_MAX_SLICES];   // available objects in the slice
-  for (size_t i = 0; i < slice_count; i++) {
-    blocks[i] = mi_page_block_at(page, page_area, bsize, page->capacity + i*slice_extend);
-    counts[i] = slice_extend;
-  }
-  counts[slice_count-1] += (extend % slice_count);  // final slice holds the modulus too (todo: distribute evenly?)
-
-  // and initialize the free list by randomly threading through them
-  // set up first element
-  const uintptr_t r = _mi_heap_random_next(heap);
-  size_t current = r % slice_count;
-  counts[current]--;
-  mi_block_t* const free_start = blocks[current];
-  // and iterate through the rest; use `random_shuffle` for performance
-  uintptr_t rnd = _mi_random_shuffle(r|1); // ensure not 0
-  for (size_t i = 1; i < extend; i++) {
-    // call random_shuffle only every INTPTR_SIZE rounds
-    const size_t round = i%MI_INTPTR_SIZE;
-    if (round == 0) rnd = _mi_random_shuffle(rnd);
-    // select a random next slice index
-    size_t next = ((rnd >> 8*round) & (slice_count-1));
-    while (counts[next]==0) {                            // ensure it still has space
-      next++;
-      if (next==slice_count) next = 0;
-    }
-    // and link the current block to it
-    counts[next]--;
-    mi_block_t* const block = blocks[current];
-    blocks[current] = (mi_block_t*)((uint8_t*)block + bsize);  // bump to the following block
-    mi_block_set_next(page, block, blocks[next]);   // and set next; note: we may have `current == next`
-    current = next;
-  }
-  // prepend to the free list (usually NULL)
-  mi_block_set_next(page, blocks[current], page->free);  // end of the list
-  page->free = free_start;
-}
-
-static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats)
-{
-  MI_UNUSED(stats);
-  #if (MI_SECURE <= 2)
-  mi_assert_internal(page->free == NULL);
-  mi_assert_internal(page->local_free == NULL);
-  #endif
-  mi_assert_internal(page->capacity + extend <= page->reserved);
-  mi_assert_internal(bsize == mi_page_block_size(page));
-  void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL );
-
-  mi_block_t* const start = mi_page_block_at(page, page_area, bsize, page->capacity);
-
-  // initialize a sequential free list
-  mi_block_t* const last = mi_page_block_at(page, page_area, bsize, page->capacity + extend - 1);
-  mi_block_t* block = start;
-  while(block <= last) {
-    mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize);
-    mi_block_set_next(page,block,next);
-    block = next;
-  }
-  // prepend to free list (usually `NULL`)
-  mi_block_set_next(page, last, page->free);
-  page->free = start;
-}
-
-/* -----------------------------------------------------------
-  Page initialize and extend the capacity
------------------------------------------------------------ */
-
-#define MI_MAX_EXTEND_SIZE    (4*1024)      // heuristic, one OS page seems to work well.
-#if (MI_SECURE>0)
-#define MI_MIN_EXTEND         (8*MI_SECURE) // extend at least by this many
-#else
-#define MI_MIN_EXTEND         (4)
-#endif
-
-// Extend the capacity (up to reserved) by initializing a free list
-// We do at most `MI_MAX_EXTEND` to avoid touching too much memory
-// Note: we also experimented with "bump" allocation on the first
-// allocations but this did not speed up any benchmark (due to an
-// extra test in malloc? or cache effects?)
-static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) {
-  MI_UNUSED(tld); 
-  mi_assert_expensive(mi_page_is_valid_init(page));
-  #if (MI_SECURE<=2)
-  mi_assert(page->free == NULL);
-  mi_assert(page->local_free == NULL);
-  if (page->free != NULL) return;
-  #endif
-  if (page->capacity >= page->reserved) return;
-
-  size_t page_size;
-  _mi_page_start(_mi_page_segment(page), page, &page_size);
-  mi_stat_counter_increase(tld->stats.pages_extended, 1);
-
-  // calculate the extend count
-  const size_t bsize = (page->xblock_size < MI_HUGE_BLOCK_SIZE ? page->xblock_size : page_size);
-  size_t extend = page->reserved - page->capacity;
-  mi_assert_internal(extend > 0);
-
-  size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)bsize);
-  if (max_extend < MI_MIN_EXTEND) { max_extend = MI_MIN_EXTEND; }
-  mi_assert_internal(max_extend > 0);
-
-  if (extend > max_extend) {
-    // ensure we don't touch memory beyond the page to reduce page commit.
-    // the `lean` benchmark tests this. Going from 1 to 8 increases rss by 50%.
-    extend = max_extend;
-  }
-
-  mi_assert_internal(extend > 0 && extend + page->capacity <= page->reserved);
-  mi_assert_internal(extend < (1UL<<16));
-
-  // and append the extend the free list
-  if (extend < MI_MIN_SLICES || MI_SECURE==0) { //!mi_option_is_enabled(mi_option_secure)) {
-    mi_page_free_list_extend(page, bsize, extend, &tld->stats );
-  }
-  else {
-    mi_page_free_list_extend_secure(heap, page, bsize, extend, &tld->stats);
-  }
-  // enable the new free list
-  page->capacity += (uint16_t)extend;
-  mi_stat_increase(tld->stats.page_committed, extend * bsize);
-
-  // extension into zero initialized memory preserves the zero'd free list
-  if (!page->is_zero_init) {
-    page->is_zero = false;
-  }
-  mi_assert_expensive(mi_page_is_valid_init(page));
-}
-
-// Initialize a fresh page
-static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi_tld_t* tld) {
-  mi_assert(page != NULL);
-  mi_segment_t* segment = _mi_page_segment(page);
-  mi_assert(segment != NULL);
-  mi_assert_internal(block_size > 0);
-  // set fields
-  mi_page_set_heap(page, heap);
-  page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); // initialize before _mi_segment_page_start
-  size_t page_size;
-  const void* page_start = _mi_segment_page_start(segment, page, &page_size);
-  MI_UNUSED(page_start);
-  mi_track_mem_noaccess(page_start,page_size);
-  mi_assert_internal(mi_page_block_size(page) <= page_size);
-  mi_assert_internal(page_size <= page->slice_count*MI_SEGMENT_SLICE_SIZE);
-  mi_assert_internal(page_size / block_size < (1L<<16));
-  page->reserved = (uint16_t)(page_size / block_size);
-  mi_assert_internal(page->reserved > 0);
-  #ifdef MI_ENCODE_FREELIST
-  page->keys[0] = _mi_heap_random_next(heap);
-  page->keys[1] = _mi_heap_random_next(heap);
-  #endif
-  #if MI_DEBUG > 0
-  page->is_zero = false; // ensure in debug mode we initialize with MI_DEBUG_UNINIT, see issue #501
-  #else
-  page->is_zero = page->is_zero_init;
-  #endif
-
-  mi_assert_internal(page->is_committed);
-  mi_assert_internal(!page->is_reset);
-  mi_assert_internal(page->capacity == 0);
-  mi_assert_internal(page->free == NULL);
-  mi_assert_internal(page->used == 0);
-  mi_assert_internal(page->xthread_free == 0);
-  mi_assert_internal(page->next == NULL);
-  mi_assert_internal(page->prev == NULL);
-  mi_assert_internal(page->retire_expire == 0);
-  mi_assert_internal(!mi_page_has_aligned(page));
-  #if (MI_ENCODE_FREELIST)
-  mi_assert_internal(page->keys[0] != 0);
-  mi_assert_internal(page->keys[1] != 0);
-  #endif
-  mi_assert_expensive(mi_page_is_valid_init(page));
-
-  // initialize an initial free list
-  mi_page_extend_free(heap,page,tld);
-  mi_assert(mi_page_immediate_available(page));
-}
-
-
-/* -----------------------------------------------------------
-  Find pages with free blocks
--------------------------------------------------------------*/
-
-// Find a page with free blocks of `page->block_size`.
-static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try)
-{
-  // search through the pages in "next fit" order
-  size_t count = 0;
-  mi_page_t* page = pq->first;
-  while (page != NULL)
-  {
-    mi_page_t* next = page->next; // remember next
-    count++;
-
-    // 0. collect freed blocks by us and other threads
-    _mi_page_free_collect(page, false);
-
-    // 1. if the page contains free blocks, we are done
-    if (mi_page_immediate_available(page)) {
-      break;  // pick this one
-    }
-
-    // 2. Try to extend
-    if (page->capacity < page->reserved) {
-      mi_page_extend_free(heap, page, heap->tld);
-      mi_assert_internal(mi_page_immediate_available(page));
-      break;
-    }
-
-    // 3. If the page is completely full, move it to the `mi_pages_full`
-    // queue so we don't visit long-lived pages too often.
-    mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page));
-    mi_page_to_full(page, pq);
-
-    page = next;
-  } // for each page
-
-  mi_heap_stat_counter_increase(heap, searches, count);
-
-  if (page == NULL) {
-    _mi_heap_collect_retired(heap, false); // perhaps make a page available?
-    page = mi_page_fresh(heap, pq);
-    if (page == NULL && first_try) {
-      // out-of-memory _or_ an abandoned page with free blocks was reclaimed, try once again
-      page = mi_page_queue_find_free_ex(heap, pq, false);
-    }
-  }
-  else {
-    mi_assert(pq->first == page);
-    page->retire_expire = 0;
-  }
-  mi_assert_internal(page == NULL || mi_page_immediate_available(page));
-  return page;
-}
-
-
-
-// Find a page with free blocks of `size`.
-static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
-  mi_page_queue_t* pq = mi_page_queue(heap,size);
-  mi_page_t* page = pq->first;
-  if (page != NULL) {
-   #if (MI_SECURE>=3) // in secure mode, we extend half the time to increase randomness
-    if (page->capacity < page->reserved && ((_mi_heap_random_next(heap) & 1) == 1)) {
-      mi_page_extend_free(heap, page, heap->tld);
-      mi_assert_internal(mi_page_immediate_available(page));
-    }
-    else
-   #endif
-    {
-      _mi_page_free_collect(page,false);
-    }
-
-    if (mi_page_immediate_available(page)) {
-      page->retire_expire = 0;
-      return page; // fast path
-    }
-  }
-  return mi_page_queue_find_free_ex(heap, pq, true);
-}
-
-
-/* -----------------------------------------------------------
-  Users can register a deferred free function called
-  when the `free` list is empty. Since the `local_free`
-  is separate this is deterministically called after
-  a certain number of allocations.
------------------------------------------------------------ */
-
-static mi_deferred_free_fun* volatile deferred_free = NULL;
-static _Atomic(void*) deferred_arg; // = NULL
-
-void _mi_deferred_free(mi_heap_t* heap, bool force) {
-  heap->tld->heartbeat++;
-  if (deferred_free != NULL && !heap->tld->recurse) {
-    heap->tld->recurse = true;
-    deferred_free(force, heap->tld->heartbeat, mi_atomic_load_ptr_relaxed(void,&deferred_arg));
-    heap->tld->recurse = false;
-  }
-}
-
-void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noexcept {
-  deferred_free = fn;
-  mi_atomic_store_ptr_release(void,&deferred_arg, arg);
-}
-
-
-/* -----------------------------------------------------------
-  General allocation
------------------------------------------------------------ */
-
-// Large and huge page allocation.
-// Huge pages are allocated directly without being in a queue.
-// Because huge pages contain just one block, and the segment contains
-// just that page, we always treat them as abandoned and any thread
-// that frees the block can free the whole page and segment directly.
-// Huge pages are also use if the requested alignment is very large (> MI_ALIGNMENT_MAX).
-static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_alignment) {
-  size_t block_size = _mi_os_good_alloc_size(size);
-  mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE || page_alignment > 0);
-  bool is_huge = (block_size > MI_LARGE_OBJ_SIZE_MAX || page_alignment > 0);
-  #if MI_HUGE_PAGE_ABANDON
-  mi_page_queue_t* pq = (is_huge ? NULL : mi_page_queue(heap, block_size));
-  #else
-  mi_page_queue_t* pq = mi_page_queue(heap, is_huge ? MI_HUGE_BLOCK_SIZE : block_size); // not block_size as that can be low if the page_alignment > 0
-  mi_assert_internal(!is_huge || mi_page_queue_is_huge(pq));
-  #endif
-  mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size, page_alignment);
-  if (page != NULL) {
-    mi_assert_internal(mi_page_immediate_available(page));
-    
-    if (is_huge) {
-      mi_assert_internal(_mi_page_segment(page)->kind == MI_SEGMENT_HUGE);
-      mi_assert_internal(_mi_page_segment(page)->used==1);
-      #if MI_HUGE_PAGE_ABANDON
-      mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue
-      mi_page_set_heap(page, NULL);
-      #endif      
-    }
-    else {
-      mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
-    }
-    
-    const size_t bsize = mi_page_usable_block_size(page);  // note: not `mi_page_block_size` to account for padding
-    if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
-      mi_heap_stat_increase(heap, large, bsize);
-      mi_heap_stat_counter_increase(heap, large_count, 1);
-    }
-    else {
-      mi_heap_stat_increase(heap, huge, bsize);
-      mi_heap_stat_counter_increase(heap, huge_count, 1);
-    }
-  }
-  return page;
-}
-
-
-// Allocate a page
-// Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed.
-static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignment) mi_attr_noexcept {
-  // huge allocation?
-  const size_t req_size = size - MI_PADDING_SIZE;  // correct for padding_size in case of an overflow on `size`  
-  if mi_unlikely(req_size > (MI_MEDIUM_OBJ_SIZE_MAX - MI_PADDING_SIZE) || huge_alignment > 0) {
-    if mi_unlikely(req_size > PTRDIFF_MAX) {  // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
-      _mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size);
-      return NULL;
-    }
-    else {
-      return mi_large_huge_page_alloc(heap,size,huge_alignment);
-    }
-  }
-  else {
-    // otherwise find a page with free blocks in our size segregated queues
-    mi_assert_internal(size >= MI_PADDING_SIZE);
-    return mi_find_free_page(heap, size);
-  }
-}
-
-// Generic allocation routine if the fast path (`alloc.c:mi_page_malloc`) does not succeed.
-// Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed.
-// The `huge_alignment` is normally 0 but is set to a multiple of MI_SEGMENT_SIZE for
-// very large requested alignments in which case we use a huge segment.
-void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept
-{
-  mi_assert_internal(heap != NULL);
-
-  // initialize if necessary
-  if mi_unlikely(!mi_heap_is_initialized(heap)) {
-    mi_thread_init(); // calls `_mi_heap_init` in turn
-    heap = mi_get_default_heap();
-    if mi_unlikely(!mi_heap_is_initialized(heap)) { return NULL; }
-  }
-  mi_assert_internal(mi_heap_is_initialized(heap));
-
-  // call potential deferred free routines
-  _mi_deferred_free(heap, false);
-
-  // free delayed frees from other threads (but skip contended ones)
-  _mi_heap_delayed_free_partial(heap);
-
-  // find (or allocate) a page of the right size
-  mi_page_t* page = mi_find_page(heap, size, huge_alignment);
-  if mi_unlikely(page == NULL) { // first time out of memory, try to collect and retry the allocation once more
-    mi_heap_collect(heap, true /* force */);
-    page = mi_find_page(heap, size, huge_alignment);
-  }
-
-  if mi_unlikely(page == NULL) { // out of memory
-    const size_t req_size = size - MI_PADDING_SIZE;  // correct for padding_size in case of an overflow on `size`
-    _mi_error_message(ENOMEM, "unable to allocate memory (%zu bytes)\n", req_size);
-    return NULL;
-  }
-
-  mi_assert_internal(mi_page_immediate_available(page));
-  mi_assert_internal(mi_page_block_size(page) >= size);
-
-  // and try again, this time succeeding! (i.e. this should never recurse through _mi_page_malloc)
-  if mi_unlikely(zero && page->xblock_size == 0) {
-    // note: we cannot call _mi_page_malloc with zeroing for huge blocks; we zero it afterwards in that case.
-    void* p = _mi_page_malloc(heap, page, size, false);
-    mi_assert_internal(p != NULL);
-    _mi_memzero_aligned(p, mi_page_usable_block_size(page));
-    return p;
-  }
-  else {
-    return _mi_page_malloc(heap, page, size, zero);
-  }
-}
diff --git a/3rd/mimalloc-2.0.9/src/region.c b/3rd/mimalloc-2.0.9/src/region.c
deleted file mode 100644
index 3571abb6..00000000
--- a/3rd/mimalloc-2.0.9/src/region.c
+++ /dev/null
@@ -1,516 +0,0 @@
-/* ----------------------------------------------------------------------------
-Copyright (c) 2019-2020, Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
------------------------------------------------------------------------------*/
-
-/* ----------------------------------------------------------------------------
-This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..)
-and the segment and huge object allocation by mimalloc. There may be multiple
-implementations of this (one could be the identity going directly to the OS,
-another could be a simple cache etc), but the current one uses large "regions".
-In contrast to the rest of mimalloc, the "regions" are shared between threads and
-need to be accessed using atomic operations.
-We need this memory layer between the raw OS calls because of:
-1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order
-   to reuse memory effectively.
-2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of
-   an OS allocation/free is still (much) too expensive relative to the accesses
-   in that object :-( (`malloc-large` tests this). This means we need a cheaper
-   way to reuse memory.
-3. This layer allows for NUMA aware allocation.
-
-Possible issues:
-- (2) can potentially be addressed too with a small cache per thread which is much
-  simpler. Generally though that requires shrinking of huge pages, and may overuse
-  memory per thread. (and is not compatible with `sbrk`).
-- Since the current regions are per-process, we need atomic operations to
-  claim blocks which may be contended
-- In the worst case, we need to search the whole region map (16KiB for 256GiB)
-  linearly. At what point will direct OS calls be faster? Is there a way to
-  do this better without adding too much complexity?
------------------------------------------------------------------------------*/
-#include "mimalloc.h"
-#include "mimalloc-internal.h"
-#include "mimalloc-atomic.h"
-
-#include <string.h>  // memset
-
-#include "bitmap.h"
-
-// Internal raw OS interface
-size_t  _mi_os_large_page_size(void);
-bool    _mi_os_protect(void* addr, size_t size);
-bool    _mi_os_unprotect(void* addr, size_t size);
-bool    _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
-bool    _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
-bool    _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
-bool    _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
-bool    _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats);
-
-// arena.c
-mi_arena_id_t _mi_arena_id_none(void);
-void    _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats);
-void*   _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
-void*   _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
-
-
-
-// Constants
-#if (MI_INTPTR_SIZE==8)
-#define MI_HEAP_REGION_MAX_SIZE    (256 * MI_GiB)  // 64KiB for the region map
-#elif (MI_INTPTR_SIZE==4)
-#define MI_HEAP_REGION_MAX_SIZE    (3 * MI_GiB)    // ~ KiB for the region map
-#else
-#error "define the maximum heap space allowed for regions on this platform"
-#endif
-
-#define MI_REGION_MAX_BLOCKS      MI_BITMAP_FIELD_BITS
-#define MI_REGION_SIZE            (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS)    // 256MiB  (64MiB on 32 bits)
-#define MI_REGION_MAX             (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE)  // 1024  (48 on 32 bits)
-#define MI_REGION_MAX_OBJ_BLOCKS  (MI_REGION_MAX_BLOCKS/4)                    // 64MiB
-#define MI_REGION_MAX_OBJ_SIZE    (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE)
-
-// Region info
-typedef union mi_region_info_u {
-  size_t value;
-  struct {
-    bool  valid;        // initialized?
-    bool  is_large:1;   // allocated in fixed large/huge OS pages
-    bool  is_pinned:1;  // pinned memory cannot be decommitted
-    short numa_node;    // the associated NUMA node (where -1 means no associated node)
-  } x;
-} mi_region_info_t;
-
-
-// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with
-// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
-typedef struct mem_region_s {
-  _Atomic(size_t)           info;        // mi_region_info_t.value
-  _Atomic(void*)            start;       // start of the memory area
-  mi_bitmap_field_t         in_use;      // bit per in-use block
-  mi_bitmap_field_t         dirty;       // track if non-zero per block
-  mi_bitmap_field_t         commit;      // track if committed per block
-  mi_bitmap_field_t         reset;       // track if reset per block
-  _Atomic(size_t)           arena_memid; // if allocated from a (huge page) arena
-  _Atomic(size_t)           padding;     // round to 8 fields (needs to be atomic for msvc, see issue #508)
-} mem_region_t;
-
-// The region map
-static mem_region_t regions[MI_REGION_MAX];
-
-// Allocated regions
-static _Atomic(size_t) regions_count; // = 0;
-
-
-/* ----------------------------------------------------------------------------
-Utility functions
------------------------------------------------------------------------------*/
-
-// Blocks (of 4MiB) needed for the given size.
-static size_t mi_region_block_count(size_t size) {
-  return _mi_divide_up(size, MI_SEGMENT_SIZE);
-}
-
-/*
-// Return a rounded commit/reset size such that we don't fragment large OS pages into small ones.
-static size_t mi_good_commit_size(size_t size) {
-  if (size > (SIZE_MAX - _mi_os_large_page_size())) return size;
-  return _mi_align_up(size, _mi_os_large_page_size());
-}
-*/
-
-// Return if a pointer points into a region reserved by us.
-mi_decl_nodiscard bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
-  if (p==NULL) return false;
-  size_t count = mi_atomic_load_relaxed(&regions_count);
-  for (size_t i = 0; i < count; i++) {
-    uint8_t* start = (uint8_t*)mi_atomic_load_ptr_relaxed(uint8_t, &regions[i].start);
-    if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true;
-  }
-  return false;
-}
-
-
-static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) {
-  uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t, &((mem_region_t*)region)->start);
-  mi_assert_internal(start != NULL);
-  return (start + (bit_idx * MI_SEGMENT_SIZE));
-}
-
-static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) {
-  mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS);
-  size_t idx = region - regions;
-  mi_assert_internal(&regions[idx] == region);
-  return (idx*MI_BITMAP_FIELD_BITS + bit_idx)<<1;
-}
-
-static size_t mi_memid_create_from_arena(size_t arena_memid) {
-  return (arena_memid << 1) | 1;
-}
-
-
-static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) {
-  if ((id&1)==1) {
-    if (arena_memid != NULL) *arena_memid = (id>>1);
-    return true;
-  }
-  else {
-    size_t idx = (id >> 1) / MI_BITMAP_FIELD_BITS;
-    *bit_idx   = (mi_bitmap_index_t)(id>>1) % MI_BITMAP_FIELD_BITS;
-    *region    = &regions[idx];
-    return false;
-  }
-}
-
-
-/* ----------------------------------------------------------------------------
-  Allocate a region is allocated from the OS (or an arena)
------------------------------------------------------------------------------*/
-
-static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
-{
-  // not out of regions yet?
-  if (mi_atomic_load_relaxed(&regions_count) >= MI_REGION_MAX - 1) return false;
-
-  // try to allocate a fresh region from the OS
-  bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit));
-  bool region_large = (commit && allow_large);
-  bool is_zero = false;
-  bool is_pinned = false;
-  size_t arena_memid = 0;
-  void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, &region_commit, &region_large, &is_pinned, &is_zero, _mi_arena_id_none(),  & arena_memid, tld);
-  if (start == NULL) return false;
-  mi_assert_internal(!(region_large && !allow_large));
-  mi_assert_internal(!region_large || region_commit);
-
-  // claim a fresh slot
-  const size_t idx = mi_atomic_increment_acq_rel(&regions_count);
-  if (idx >= MI_REGION_MAX) {
-    mi_atomic_decrement_acq_rel(&regions_count);
-    _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, region_commit, tld->stats);
-    _mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, MI_GiB));
-    return false;
-  }
-
-  // allocated, initialize and claim the initial blocks
-  mem_region_t* r = &regions[idx];
-  r->arena_memid  = arena_memid;
-  mi_atomic_store_release(&r->in_use, (size_t)0);
-  mi_atomic_store_release(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL));
-  mi_atomic_store_release(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0));
-  mi_atomic_store_release(&r->reset, (size_t)0);
-  *bit_idx = 0;
-  _mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL);
-  mi_atomic_store_ptr_release(void,&r->start, start);
-
-  // and share it
-  mi_region_info_t info;
-  info.value = 0;                        // initialize the full union to zero
-  info.x.valid = true;
-  info.x.is_large = region_large;
-  info.x.is_pinned = is_pinned;
-  info.x.numa_node = (short)_mi_os_numa_node(tld);
-  mi_atomic_store_release(&r->info, info.value); // now make it available to others
-  *region = r;
-  return true;
-}
-
-/* ----------------------------------------------------------------------------
-  Try to claim blocks in suitable regions
------------------------------------------------------------------------------*/
-
-static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) {
-  // initialized at all?
-  mi_region_info_t info;
-  info.value = mi_atomic_load_relaxed(&((mem_region_t*)region)->info);
-  if (info.value==0) return false;
-
-  // numa correct
-  if (numa_node >= 0) {  // use negative numa node to always succeed
-    int rnode = info.x.numa_node;
-    if (rnode >= 0 && rnode != numa_node) return false;
-  }
-
-  // check allow-large
-  if (!allow_large && info.x.is_large) return false;
-
-  return true;
-}
-
-
-static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
-{
-  // try all regions for a free slot
-  const size_t count = mi_atomic_load_relaxed(&regions_count); // monotonic, so ok to be relaxed
-  size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though
-  for (size_t visited = 0; visited < count; visited++, idx++) {
-    if (idx >= count) idx = 0;  // wrap around
-    mem_region_t* r = &regions[idx];
-    // if this region suits our demand (numa node matches, large OS page matches)
-    if (mi_region_is_suitable(r, numa_node, allow_large)) {
-      // then try to atomically claim a segment(s) in this region
-      if (_mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) {
-        tld->region_idx = idx;    // remember the last found position
-        *region = r;
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
-
-static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
-{
-  mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS);
-  mem_region_t* region;
-  mi_bitmap_index_t bit_idx;
-  const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld));
-  // try to claim in existing regions
-  if (!mi_region_try_claim(numa_node, blocks, *large, &region, &bit_idx, tld)) {
-    // otherwise try to allocate a fresh region and claim in there
-    if (!mi_region_try_alloc_os(blocks, *commit, *large, &region, &bit_idx, tld)) {
-      // out of regions or memory
-      return NULL;
-    }
-  }
-
-  // ------------------------------------------------
-  // found a region and claimed `blocks` at `bit_idx`, initialize them now
-  mi_assert_internal(region != NULL);
-  mi_assert_internal(_mi_bitmap_is_claimed(&region->in_use, 1, blocks, bit_idx));
-
-  mi_region_info_t info;
-  info.value = mi_atomic_load_acquire(&region->info);
-  uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,&region->start);
-  mi_assert_internal(!(info.x.is_large && !*large));
-  mi_assert_internal(start != NULL);
-
-  *is_zero   = _mi_bitmap_claim(&region->dirty, 1, blocks, bit_idx, NULL);
-  *large     = info.x.is_large;
-  *is_pinned = info.x.is_pinned;
-  *memid     = mi_memid_create(region, bit_idx);
-  void* p = start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE);
-
-  // commit
-  if (*commit) {
-    // ensure commit
-    bool any_uncommitted;
-    _mi_bitmap_claim(&region->commit, 1, blocks, bit_idx, &any_uncommitted);
-    if (any_uncommitted) {
-      mi_assert_internal(!info.x.is_large && !info.x.is_pinned);
-      bool commit_zero = false;
-      if (!_mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld)) {
-        // failed to commit! unclaim and return
-        mi_bitmap_unclaim(&region->in_use, 1, blocks, bit_idx);
-        return NULL;
-      }
-      if (commit_zero) *is_zero = true;
-    }
-  }
-  else {
-    // no need to commit, but check if already fully committed
-    *commit = _mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx);
-  }
-  mi_assert_internal(!*commit || _mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx));
-
-  // unreset reset blocks
-  if (_mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx)) {
-    // some blocks are still reset
-    mi_assert_internal(!info.x.is_large && !info.x.is_pinned);
-    mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0);
-    mi_bitmap_unclaim(&region->reset, 1, blocks, bit_idx);
-    if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed
-      bool reset_zero = false;
-      _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld);
-      if (reset_zero) *is_zero = true;
-    }
-  }
-  mi_assert_internal(!_mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx));
-
-  #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED
-  if (*commit) { ((uint8_t*)p)[0] = 0; }
-  #endif
-
-  // and return the allocation
-  mi_assert_internal(p != NULL);
-  return p;
-}
-
-
-/* ----------------------------------------------------------------------------
- Allocation
------------------------------------------------------------------------------*/
-
-// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`.
-// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`)
-void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
-{
-  mi_assert_internal(memid != NULL && tld != NULL);
-  mi_assert_internal(size > 0);
-  *memid = 0;
-  *is_zero = false;
-  *is_pinned = false;
-  bool default_large = false;
-  if (large==NULL) large = &default_large;  // ensure `large != NULL`
-  if (size == 0) return NULL;
-  size = _mi_align_up(size, _mi_os_page_size());
-
-  // allocate from regions if possible
-  void* p = NULL;
-  size_t arena_memid;
-  const size_t blocks = mi_region_block_count(size);
-  if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) {
-    p = mi_region_try_alloc(blocks, commit, large, is_pinned, is_zero, memid, tld);
-    if (p == NULL) {
-      _mi_warning_message("unable to allocate from region: size %zu\n", size);
-    }
-  }
-  if (p == NULL) {
-    // and otherwise fall back to the OS
-    p = _mi_arena_alloc_aligned(size, alignment, align_offset, commit, large, is_pinned, is_zero, _mi_arena_id_none(),  & arena_memid, tld);
-    *memid = mi_memid_create_from_arena(arena_memid);
-  }
-
-  if (p != NULL) {
-    mi_assert_internal(((uintptr_t)p + align_offset) % alignment == 0);
-    #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED
-    if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed
-    #endif
-  }
-  return p;
-}
-
-
-
-/* ----------------------------------------------------------------------------
-Free
------------------------------------------------------------------------------*/
-
-// Free previously allocated memory with a given id.
-void _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) {
-  mi_assert_internal(size > 0 && tld != NULL);
-  if (p==NULL) return;
-  if (size==0) return;
-  size = _mi_align_up(size, _mi_os_page_size());
-
-  size_t arena_memid = 0;
-  mi_bitmap_index_t bit_idx;
-  mem_region_t* region;
-  if (mi_memid_is_arena(id,&region,&bit_idx,&arena_memid)) {
-   // was a direct arena allocation, pass through
-    _mi_arena_free(p, size, alignment, align_offset, arena_memid, full_commit, tld->stats);
-  }
-  else {
-    // allocated in a region
-    mi_assert_internal(align_offset == 0);
-    mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return;
-    const size_t blocks = mi_region_block_count(size);
-    mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS);
-    mi_region_info_t info;
-    info.value = mi_atomic_load_acquire(&region->info);
-    mi_assert_internal(info.value != 0);
-    void* blocks_start = mi_region_blocks_start(region, bit_idx);
-    mi_assert_internal(blocks_start == p); // not a pointer in our area?
-    mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS);
-    if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`?
-
-    // committed?
-    if (full_commit && (size % MI_SEGMENT_SIZE) == 0) {
-      _mi_bitmap_claim(&region->commit, 1, blocks, bit_idx, NULL);
-    }
-
-    if (any_reset) {
-      // set the is_reset bits if any pages were reset
-      _mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, NULL);
-    }
-
-    // reset the blocks to reduce the working set.
-    if (!info.x.is_large && !info.x.is_pinned && mi_option_is_enabled(mi_option_segment_reset)
-       && (mi_option_is_enabled(mi_option_eager_commit) ||
-           mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead
-    {
-      bool any_unreset;
-      _mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, &any_unreset);
-      if (any_unreset) {
-        _mi_abandoned_await_readers(); // ensure no more pending write (in case reset = decommit)
-        _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld);
-      }
-    }
-
-    // and unclaim
-    bool all_unclaimed = mi_bitmap_unclaim(&region->in_use, 1, blocks, bit_idx);
-    mi_assert_internal(all_unclaimed); MI_UNUSED(all_unclaimed);
-  }
-}
-
-
-/* ----------------------------------------------------------------------------
-  collection
------------------------------------------------------------------------------*/
-void _mi_mem_collect(mi_os_tld_t* tld) {
-  // free every region that has no segments in use.
-  size_t rcount = mi_atomic_load_relaxed(&regions_count);
-  for (size_t i = 0; i < rcount; i++) {
-    mem_region_t* region = &regions[i];
-    if (mi_atomic_load_relaxed(&region->info) != 0) {
-      // if no segments used, try to claim the whole region
-      size_t m = mi_atomic_load_relaxed(&region->in_use);
-      while (m == 0 && !mi_atomic_cas_weak_release(&region->in_use, &m, MI_BITMAP_FIELD_FULL)) { /* nothing */ };
-      if (m == 0) {
-        // on success, free the whole region
-        uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,&regions[i].start);
-        size_t arena_memid = mi_atomic_load_relaxed(&regions[i].arena_memid);
-        size_t commit = mi_atomic_load_relaxed(&regions[i].commit);
-        memset((void*)&regions[i], 0, sizeof(mem_region_t));  // cast to void* to avoid atomic warning
-        // and release the whole region
-        mi_atomic_store_release(&region->info, (size_t)0);
-        if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
-          _mi_abandoned_await_readers(); // ensure no pending reads
-          _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, (~commit == 0), tld->stats);
-        }
-      }
-    }
-  }
-}
-
-
-/* ----------------------------------------------------------------------------
-  Other
------------------------------------------------------------------------------*/
-
-bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) {
-  if (mi_option_is_enabled(mi_option_reset_decommits)) {
-    return _mi_os_decommit(p, size, tld->stats);
-  }
-  else {
-    return _mi_os_reset(p, size, tld->stats);
-  }
-}
-
-bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
-  if (mi_option_is_enabled(mi_option_reset_decommits)) {
-    return _mi_os_commit(p, size, is_zero, tld->stats);
-  }
-  else {
-    return _mi_os_unreset(p, size, is_zero, tld->stats);
-  }
-}
-
-bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
-  return _mi_os_commit(p, size, is_zero, tld->stats);
-}
-
-bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) {
-  return _mi_os_decommit(p, size, tld->stats);
-}
-
-bool _mi_mem_protect(void* p, size_t size) {
-  return _mi_os_protect(p, size);
-}
-
-bool _mi_mem_unprotect(void* p, size_t size) {
-  return _mi_os_unprotect(p, size);
-}
diff --git a/3rd/mimalloc-2.0.9/src/segment-cache.c b/3rd/mimalloc-2.0.9/src/segment-cache.c
deleted file mode 100644
index d93fd644..00000000
--- a/3rd/mimalloc-2.0.9/src/segment-cache.c
+++ /dev/null
@@ -1,409 +0,0 @@
-/* ----------------------------------------------------------------------------
-Copyright (c) 2020, Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
------------------------------------------------------------------------------*/
-
-/* ----------------------------------------------------------------------------
-  Implements a cache of segments to avoid expensive OS calls and to reuse
-  the commit_mask to optimize the commit/decommit calls.
-  The full memory map of all segments is also implemented here.
------------------------------------------------------------------------------*/
-#include "mimalloc.h"
-#include "mimalloc-internal.h"
-#include "mimalloc-atomic.h"
-
-#include "bitmap.h"  // atomic bitmap
-
-//#define MI_CACHE_DISABLE 1    // define to completely disable the segment cache
-
-#define MI_CACHE_FIELDS     (16)
-#define MI_CACHE_MAX        (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS)       // 1024 on 64-bit
-
-#define BITS_SET()          MI_ATOMIC_VAR_INIT(UINTPTR_MAX)
-#define MI_CACHE_BITS_SET   MI_INIT16(BITS_SET)                          // note: update if MI_CACHE_FIELDS changes
-
-typedef struct mi_cache_slot_s {
-  void*               p;
-  size_t              memid;
-  bool                is_pinned;
-  mi_commit_mask_t    commit_mask;
-  mi_commit_mask_t    decommit_mask;
-  _Atomic(mi_msecs_t) expire;
-} mi_cache_slot_t;
-
-static mi_decl_cache_align mi_cache_slot_t cache[MI_CACHE_MAX];    // = 0
-
-static mi_decl_cache_align mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET };        // zero bit = available!
-static mi_decl_cache_align mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET };
-static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS];   // zero bit = free
-
-static bool mi_cdecl mi_segment_cache_is_suitable(mi_bitmap_index_t bitidx, void* arg) {
-  mi_arena_id_t req_arena_id = *((mi_arena_id_t*)arg);
-  mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
-  return _mi_arena_memid_is_suitable(slot->memid, req_arena_id);
-}
-
-mi_decl_noinline static void* mi_segment_cache_pop_ex(
-                              bool all_suitable,
-                              size_t size, mi_commit_mask_t* commit_mask, 
-                              mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, 
-                              mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld)
-{
-#ifdef MI_CACHE_DISABLE
-  return NULL;
-#else
-
-  // only segment blocks
-  if (size != MI_SEGMENT_SIZE) return NULL;
-
-  // numa node determines start field
-  const int numa_node = _mi_os_numa_node(tld);
-  size_t start_field = 0;
-  if (numa_node > 0) {
-    start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node;
-    if (start_field >= MI_CACHE_FIELDS) start_field = 0;
-  }
-
-  // find an available slot
-  mi_bitmap_index_t bitidx = 0;
-  bool claimed = false;
-  mi_arena_id_t req_arena_id = _req_arena_id;
-  mi_bitmap_pred_fun_t pred_fun = (all_suitable ? NULL : &mi_segment_cache_is_suitable);  // cannot pass NULL as the arena may be exclusive itself; todo: do not put exclusive arenas in the cache?
-
-  if (*large) {  // large allowed?
-    claimed = _mi_bitmap_try_find_from_claim_pred(cache_available_large, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx);
-    if (claimed) *large = true;
-  }
-  if (!claimed) {
-    claimed = _mi_bitmap_try_find_from_claim_pred (cache_available, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx);
-    if (claimed) *large = false;
-  }
-
-  if (!claimed) return NULL;
-
-  // found a slot
-  mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
-  void* p = slot->p;
-  *memid = slot->memid;
-  *is_pinned = slot->is_pinned;
-  *is_zero = false;
-  *commit_mask = slot->commit_mask;     
-  *decommit_mask = slot->decommit_mask;
-  slot->p = NULL;
-  mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0);
-  
-  // mark the slot as free again
-  mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx));
-  _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx);
-  return p;
-#endif
-}
-
-
-mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld)
-{
-  return mi_segment_cache_pop_ex(false, size, commit_mask, decommit_mask, large, is_pinned, is_zero, _req_arena_id, memid, tld);
-}
-
-static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats)
-{
-  if (mi_commit_mask_is_empty(cmask)) {
-    // nothing
-  }
-  else if (mi_commit_mask_is_full(cmask)) {
-    _mi_os_decommit(p, total, stats);
-  }
-  else {
-    // todo: one call to decommit the whole at once?
-    mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0);
-    size_t part = total/MI_COMMIT_MASK_BITS;
-    size_t idx;
-    size_t count;    
-    mi_commit_mask_foreach(cmask, idx, count) {
-      void*  start = (uint8_t*)p + (idx*part);
-      size_t size = count*part;
-      _mi_os_decommit(start, size, stats);
-    }
-    mi_commit_mask_foreach_end()
-  }
-  mi_commit_mask_create_empty(cmask);
-}
-
-#define MI_MAX_PURGE_PER_PUSH  (4)
-
-static mi_decl_noinline void mi_segment_cache_purge(bool visit_all, bool force, mi_os_tld_t* tld)
-{
-  MI_UNUSED(tld);
-  if (!mi_option_is_enabled(mi_option_allow_decommit)) return;
-  mi_msecs_t now = _mi_clock_now();
-  size_t purged = 0;
-  const size_t max_visits = (visit_all ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */);
-  size_t idx              = (visit_all ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ );
-  for (size_t visited = 0; visited < max_visits; visited++,idx++) {  // visit N slots
-    if (idx >= MI_CACHE_MAX) idx = 0; // wrap
-    mi_cache_slot_t* slot = &cache[idx];
-    mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire);
-    if (expire != 0 && (force || now >= expire)) {  // racy read
-      // seems expired, first claim it from available
-      purged++;
-      mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx);
-      if (_mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) {
-        // was available, we claimed it
-        expire = mi_atomic_loadi64_acquire(&slot->expire);
-        if (expire != 0 && (force || now >= expire)) {  // safe read
-          // still expired, decommit it
-          mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
-          mi_assert_internal(!mi_commit_mask_is_empty(&slot->commit_mask) && _mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
-          _mi_abandoned_await_readers();  // wait until safe to decommit
-          // decommit committed parts
-          // TODO: instead of decommit, we could also free to the OS?
-          mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats);
-          mi_commit_mask_create_empty(&slot->decommit_mask);
-        }
-        _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop
-      }
-      if (!visit_all && purged > MI_MAX_PURGE_PER_PUSH) break;  // bound to no more than N purge tries per push
-    }
-  }
-}
-
-void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld) {
-  if (force) {
-    // called on `mi_collect(true)` but not on thread termination    
-    _mi_segment_cache_free_all(tld);
-  }
-  else {
-    mi_segment_cache_purge(true /* visit all */, false /* don't force unexpired */, tld);
-  }
-}
-
-void _mi_segment_cache_free_all(mi_os_tld_t* tld) {
-  mi_commit_mask_t commit_mask;
-  mi_commit_mask_t decommit_mask;
-  bool is_pinned;
-  bool is_zero;
-  size_t memid;
-  const size_t size = MI_SEGMENT_SIZE;
-  // iterate twice: first large pages, then regular memory 
-  for (int i = 0; i < 2; i++) {
-    void* p;
-    do {
-      // keep popping and freeing the memory
-      bool large = (i == 0);  
-      p = mi_segment_cache_pop_ex(true /* all */, size, &commit_mask, &decommit_mask,
-                                  &large, &is_pinned, &is_zero, _mi_arena_id_none(), &memid, tld);
-      if (p != NULL) {
-        size_t csize = _mi_commit_mask_committed_size(&commit_mask, size);
-        if (csize > 0 && !is_pinned) _mi_stat_decrease(&_mi_stats_main.committed, csize);
-        _mi_arena_free(p, size, MI_SEGMENT_ALIGN, 0, memid, is_pinned /* pretend not committed to not double count decommits */, tld->stats);
-      }
-    } while (p != NULL);
-  }
-}
-
-mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld)
-{
-#ifdef MI_CACHE_DISABLE
-  return false;
-#else
-
-  // only for normal segment blocks
-  if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false;
-
-  // numa node determines start field
-  int numa_node = _mi_os_numa_node(NULL);
-  size_t start_field = 0;
-  if (numa_node > 0) {
-    start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node;
-    if (start_field >= MI_CACHE_FIELDS) start_field = 0;
-  }
-
-  // purge expired entries
-  mi_segment_cache_purge(false /* limit purges to a constant N */, false /* don't force unexpired */, tld);
-
-  // find an available slot
-  mi_bitmap_index_t bitidx;
-  bool claimed = _mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx);
-  if (!claimed) return false;
-
-  mi_assert_internal(_mi_bitmap_is_claimed(cache_available, MI_CACHE_FIELDS, 1, bitidx));
-  mi_assert_internal(_mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
-#if MI_DEBUG>1
-  if (is_pinned || is_large) {
-    mi_assert_internal(mi_commit_mask_is_full(commit_mask));
-  }
-#endif
-
-  // set the slot
-  mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
-  slot->p = start;
-  slot->memid = memid;
-  slot->is_pinned = is_pinned;
-  mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
-  slot->commit_mask = *commit_mask;
-  slot->decommit_mask = *decommit_mask;
-  if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_decommit)) {
-    long delay = mi_option_get(mi_option_segment_decommit_delay);
-    if (delay == 0) {
-      _mi_abandoned_await_readers(); // wait until safe to decommit
-      mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats);
-      mi_commit_mask_create_empty(&slot->decommit_mask);
-    }
-    else {
-      mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay);
-    }
-  }
-
-  // make it available
-  _mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx);
-  return true;
-#endif
-}
-
-
-/* -----------------------------------------------------------
-  The following functions are to reliably find the segment or
-  block that encompasses any pointer p (or NULL if it is not
-  in any of our segments).
-  We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB)
-  set to 1 if it contains the segment meta data.
------------------------------------------------------------ */
-
-
-#if (MI_INTPTR_SIZE==8)
-#define MI_MAX_ADDRESS    ((size_t)20 << 40)  // 20TB
-#else
-#define MI_MAX_ADDRESS    ((size_t)2 << 30)   // 2Gb
-#endif
-
-#define MI_SEGMENT_MAP_BITS  (MI_MAX_ADDRESS / MI_SEGMENT_SIZE)
-#define MI_SEGMENT_MAP_SIZE  (MI_SEGMENT_MAP_BITS / 8)
-#define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE)
-
-static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1];  // 2KiB per TB with 64MiB segments
-
-static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) {
-  mi_assert_internal(_mi_ptr_segment(segment + 1) == segment); // is it aligned on MI_SEGMENT_SIZE?
-  if ((uintptr_t)segment >= MI_MAX_ADDRESS) {
-    *bitidx = 0;
-    return MI_SEGMENT_MAP_WSIZE;
-  }
-  else {
-    const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE;
-    *bitidx = segindex % MI_INTPTR_BITS;
-    const size_t mapindex = segindex / MI_INTPTR_BITS;
-    mi_assert_internal(mapindex < MI_SEGMENT_MAP_WSIZE);
-    return mapindex;
-  }
-}
-
-void _mi_segment_map_allocated_at(const mi_segment_t* segment) {
-  size_t bitidx;
-  size_t index = mi_segment_map_index_of(segment, &bitidx);
-  mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE);
-  if (index==MI_SEGMENT_MAP_WSIZE) return;
-  uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
-  uintptr_t newmask;
-  do {
-    newmask = (mask | ((uintptr_t)1 << bitidx));
-  } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask));
-}
-
-void _mi_segment_map_freed_at(const mi_segment_t* segment) {
-  size_t bitidx;
-  size_t index = mi_segment_map_index_of(segment, &bitidx);
-  mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE);
-  if (index == MI_SEGMENT_MAP_WSIZE) return;
-  uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
-  uintptr_t newmask;
-  do {
-    newmask = (mask & ~((uintptr_t)1 << bitidx));
-  } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask));
-}
-
-// Determine the segment belonging to a pointer or NULL if it is not in a valid segment.
-static mi_segment_t* _mi_segment_of(const void* p) {
-  if (p == NULL) return NULL;
-  mi_segment_t* segment = _mi_ptr_segment(p);
-  mi_assert_internal(segment != NULL);
-  size_t bitidx;
-  size_t index = mi_segment_map_index_of(segment, &bitidx);
-  // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge
-  const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
-  if mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0) {
-    return segment; // yes, allocated by us
-  }
-  if (index==MI_SEGMENT_MAP_WSIZE) return NULL;
-
-  // TODO: maintain max/min allocated range for efficiency for more efficient rejection of invalid pointers?
-
-  // search downwards for the first segment in case it is an interior pointer
-  // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough
-  // valid huge objects
-  // note: we could maintain a lowest index to speed up the path for invalid pointers?
-  size_t lobitidx;
-  size_t loindex;
-  uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1);
-  if (lobits != 0) {
-    loindex = index;
-    lobitidx = mi_bsr(lobits);    // lobits != 0
-  }
-  else if (index == 0) {
-    return NULL;
-  }
-  else {
-    mi_assert_internal(index > 0);
-    uintptr_t lomask = mask;
-    loindex = index;
-    do {
-      loindex--;  
-      lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex]);      
-    } while (lomask != 0 && loindex > 0);
-    if (lomask == 0) return NULL;
-    lobitidx = mi_bsr(lomask);    // lomask != 0
-  }
-  mi_assert_internal(loindex < MI_SEGMENT_MAP_WSIZE);
-  // take difference as the addresses could be larger than the MAX_ADDRESS space.
-  size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE;
-  segment = (mi_segment_t*)((uint8_t*)segment - diff);
-
-  if (segment == NULL) return NULL;
-  mi_assert_internal((void*)segment < p);
-  bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie);
-  mi_assert_internal(cookie_ok);
-  if mi_unlikely(!cookie_ok) return NULL;
-  if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range
-  mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment));
-  return segment;
-}
-
-// Is this a valid pointer in our heap?
-static bool  mi_is_valid_pointer(const void* p) {
-  return (_mi_segment_of(p) != NULL);
-}
-
-mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
-  return mi_is_valid_pointer(p);
-}
-
-/*
-// Return the full segment range belonging to a pointer
-static void* mi_segment_range_of(const void* p, size_t* size) {
-  mi_segment_t* segment = _mi_segment_of(p);
-  if (segment == NULL) {
-    if (size != NULL) *size = 0;
-    return NULL;
-  }
-  else {
-    if (size != NULL) *size = segment->segment_size;
-    return segment;
-  }
-  mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
-  mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size);
-  mi_reset_delayed(tld);
-  mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld));
-  return page;
-}
-*/
diff --git a/3rd/mimalloc-2.0.9/src/segment.c b/3rd/mimalloc-2.0.9/src/segment.c
deleted file mode 100644
index dc98e3e7..00000000
--- a/3rd/mimalloc-2.0.9/src/segment.c
+++ /dev/null
@@ -1,1623 +0,0 @@
-/* ----------------------------------------------------------------------------
-Copyright (c) 2018-2020, Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
------------------------------------------------------------------------------*/
-#include "mimalloc.h"
-#include "mimalloc-internal.h"
-#include "mimalloc-atomic.h"
-
-#include <string.h>  // memset
-#include <stdio.h>
-
-#define MI_PAGE_HUGE_ALIGN  (256*1024)
-
-static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats);
-
-
-// -------------------------------------------------------------------
-// commit mask 
-// -------------------------------------------------------------------
-
-static bool mi_commit_mask_all_set(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm) {
-  for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
-    if ((commit->mask[i] & cm->mask[i]) != cm->mask[i]) return false;
-  }
-  return true;
-}
-
-static bool mi_commit_mask_any_set(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm) {
-  for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
-    if ((commit->mask[i] & cm->mask[i]) != 0) return true;
-  }
-  return false;
-}
-
-static void mi_commit_mask_create_intersect(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm, mi_commit_mask_t* res) {
-  for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
-    res->mask[i] = (commit->mask[i] & cm->mask[i]);
-  }
-}
-
-static void mi_commit_mask_clear(mi_commit_mask_t* res, const mi_commit_mask_t* cm) {
-  for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
-    res->mask[i] &= ~(cm->mask[i]);
-  }
-}
-
-static void mi_commit_mask_set(mi_commit_mask_t* res, const mi_commit_mask_t* cm) {
-  for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
-    res->mask[i] |= cm->mask[i];
-  }
-}
-
-static void mi_commit_mask_create(size_t bitidx, size_t bitcount, mi_commit_mask_t* cm) {
-  mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS);
-  mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS);
-  if (bitcount == MI_COMMIT_MASK_BITS) {
-    mi_assert_internal(bitidx==0);
-    mi_commit_mask_create_full(cm);
-  }
-  else if (bitcount == 0) {
-    mi_commit_mask_create_empty(cm);
-  }
-  else {
-    mi_commit_mask_create_empty(cm);
-    size_t i = bitidx / MI_COMMIT_MASK_FIELD_BITS;
-    size_t ofs = bitidx % MI_COMMIT_MASK_FIELD_BITS;
-    while (bitcount > 0) {
-      mi_assert_internal(i < MI_COMMIT_MASK_FIELD_COUNT);
-      size_t avail = MI_COMMIT_MASK_FIELD_BITS - ofs;
-      size_t count = (bitcount > avail ? avail : bitcount);
-      size_t mask = (count >= MI_COMMIT_MASK_FIELD_BITS ? ~((size_t)0) : (((size_t)1 << count) - 1) << ofs);
-      cm->mask[i] = mask;
-      bitcount -= count;
-      ofs = 0;
-      i++;
-    }
-  }
-}
-
-size_t _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total) {
-  mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0);
-  size_t count = 0;
-  for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
-    size_t mask = cm->mask[i];
-    if (~mask == 0) {
-      count += MI_COMMIT_MASK_FIELD_BITS;
-    }
-    else {
-      for (; mask != 0; mask >>= 1) {  // todo: use popcount
-        if ((mask&1)!=0) count++;
-      }
-    }
-  }
-  // we use total since for huge segments each commit bit may represent a larger size
-  return ((total / MI_COMMIT_MASK_BITS) * count);
-}
-
-
-size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx) {
-  size_t i = (*idx) / MI_COMMIT_MASK_FIELD_BITS;
-  size_t ofs = (*idx) % MI_COMMIT_MASK_FIELD_BITS;
-  size_t mask = 0;
-  // find first ones
-  while (i < MI_COMMIT_MASK_FIELD_COUNT) {
-    mask = cm->mask[i];
-    mask >>= ofs;
-    if (mask != 0) {
-      while ((mask&1) == 0) {
-        mask >>= 1;
-        ofs++;
-      }
-      break;
-    }
-    i++;
-    ofs = 0;
-  }
-  if (i >= MI_COMMIT_MASK_FIELD_COUNT) {
-    // not found
-    *idx = MI_COMMIT_MASK_BITS;
-    return 0;
-  }
-  else {
-    // found, count ones
-    size_t count = 0;
-    *idx = (i*MI_COMMIT_MASK_FIELD_BITS) + ofs;
-    do {
-      mi_assert_internal(ofs < MI_COMMIT_MASK_FIELD_BITS && (mask&1) == 1);
-      do {
-        count++;
-        mask >>= 1;
-      } while ((mask&1) == 1);
-      if ((((*idx + count) % MI_COMMIT_MASK_FIELD_BITS) == 0)) {
-        i++;
-        if (i >= MI_COMMIT_MASK_FIELD_COUNT) break;
-        mask = cm->mask[i];
-        ofs = 0;
-      }
-    } while ((mask&1) == 1);
-    mi_assert_internal(count > 0);
-    return count;
-  }
-}
-
-
-/* --------------------------------------------------------------------------------
-  Segment allocation
-
-  If a  thread ends, it "abandons" pages with used blocks
-  and there is an abandoned segment list whose segments can
-  be reclaimed by still running threads, much like work-stealing.
--------------------------------------------------------------------------------- */
-
-
-/* -----------------------------------------------------------
-   Slices
------------------------------------------------------------ */
-
-
-static const mi_slice_t* mi_segment_slices_end(const mi_segment_t* segment) {
-  return &segment->slices[segment->slice_entries];
-}
-
-static uint8_t* mi_slice_start(const mi_slice_t* slice) {
-  mi_segment_t* segment = _mi_ptr_segment(slice);
-  mi_assert_internal(slice >= segment->slices && slice < mi_segment_slices_end(segment));
-  return ((uint8_t*)segment + ((slice - segment->slices)*MI_SEGMENT_SLICE_SIZE));
-}
-
-
-/* -----------------------------------------------------------
-   Bins
------------------------------------------------------------ */
-// Use bit scan forward to quickly find the first zero bit if it is available
-
-static inline size_t mi_slice_bin8(size_t slice_count) {
-  if (slice_count<=1) return slice_count;
-  mi_assert_internal(slice_count <= MI_SLICES_PER_SEGMENT);
-  slice_count--;
-  size_t s = mi_bsr(slice_count);  // slice_count > 1
-  if (s <= 2) return slice_count + 1;
-  size_t bin = ((s << 2) | ((slice_count >> (s - 2))&0x03)) - 4;
-  return bin;
-}
-
-static inline size_t mi_slice_bin(size_t slice_count) {
-  mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_SEGMENT_SIZE);
-  mi_assert_internal(mi_slice_bin8(MI_SLICES_PER_SEGMENT) <= MI_SEGMENT_BIN_MAX);
-  size_t bin = mi_slice_bin8(slice_count);
-  mi_assert_internal(bin <= MI_SEGMENT_BIN_MAX);
-  return bin;
-}
-
-static inline size_t mi_slice_index(const mi_slice_t* slice) {
-  mi_segment_t* segment = _mi_ptr_segment(slice);
-  ptrdiff_t index = slice - segment->slices;
-  mi_assert_internal(index >= 0 && index < (ptrdiff_t)segment->slice_entries);
-  return index;
-}
-
-
-/* -----------------------------------------------------------
-   Slice span queues
------------------------------------------------------------ */
-
-static void mi_span_queue_push(mi_span_queue_t* sq, mi_slice_t* slice) {
-  // todo: or push to the end?
-  mi_assert_internal(slice->prev == NULL && slice->next==NULL);
-  slice->prev = NULL; // paranoia
-  slice->next = sq->first;
-  sq->first = slice;
-  if (slice->next != NULL) slice->next->prev = slice;
-                     else sq->last = slice;
-  slice->xblock_size = 0; // free
-}
-
-static mi_span_queue_t* mi_span_queue_for(size_t slice_count, mi_segments_tld_t* tld) {
-  size_t bin = mi_slice_bin(slice_count);
-  mi_span_queue_t* sq = &tld->spans[bin];
-  mi_assert_internal(sq->slice_count >= slice_count);
-  return sq;
-}
-
-static void mi_span_queue_delete(mi_span_queue_t* sq, mi_slice_t* slice) {
-  mi_assert_internal(slice->xblock_size==0 && slice->slice_count>0 && slice->slice_offset==0);
-  // should work too if the queue does not contain slice (which can happen during reclaim)
-  if (slice->prev != NULL) slice->prev->next = slice->next;
-  if (slice == sq->first) sq->first = slice->next;
-  if (slice->next != NULL) slice->next->prev = slice->prev;
-  if (slice == sq->last) sq->last = slice->prev;
-  slice->prev = NULL;
-  slice->next = NULL;
-  slice->xblock_size = 1; // no more free
-}
-
-
-/* -----------------------------------------------------------
- Invariant checking
------------------------------------------------------------ */
-
-static bool mi_slice_is_used(const mi_slice_t* slice) {
-  return (slice->xblock_size > 0);
-}
-
-
-#if (MI_DEBUG>=3)
-static bool mi_span_queue_contains(mi_span_queue_t* sq, mi_slice_t* slice) {
-  for (mi_slice_t* s = sq->first; s != NULL; s = s->next) {
-    if (s==slice) return true;
-  }
-  return false;
-}
-
-static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
-  mi_assert_internal(segment != NULL);
-  mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
-  mi_assert_internal(segment->abandoned <= segment->used);
-  mi_assert_internal(segment->thread_id == 0 || segment->thread_id == _mi_thread_id());
-  mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); // can only decommit committed blocks
-  //mi_assert_internal(segment->segment_info_size % MI_SEGMENT_SLICE_SIZE == 0);
-  mi_slice_t* slice = &segment->slices[0];
-  const mi_slice_t* end = mi_segment_slices_end(segment);
-  size_t used_count = 0;
-  mi_span_queue_t* sq;
-  while(slice < end) {
-    mi_assert_internal(slice->slice_count > 0);
-    mi_assert_internal(slice->slice_offset == 0);
-    size_t index = mi_slice_index(slice);
-    size_t maxindex = (index + slice->slice_count >= segment->slice_entries ? segment->slice_entries : index + slice->slice_count) - 1;
-    if (mi_slice_is_used(slice)) { // a page in use, we need at least MAX_SLICE_OFFSET valid back offsets
-      used_count++;
-      for (size_t i = 0; i <= MI_MAX_SLICE_OFFSET && index + i <= maxindex; i++) {
-        mi_assert_internal(segment->slices[index + i].slice_offset == i*sizeof(mi_slice_t));
-        mi_assert_internal(i==0 || segment->slices[index + i].slice_count == 0);
-        mi_assert_internal(i==0 || segment->slices[index + i].xblock_size == 1);
-      }
-      // and the last entry as well (for coalescing)
-      const mi_slice_t* last = slice + slice->slice_count - 1;
-      if (last > slice && last < mi_segment_slices_end(segment)) {
-        mi_assert_internal(last->slice_offset == (slice->slice_count-1)*sizeof(mi_slice_t));
-        mi_assert_internal(last->slice_count == 0);
-        mi_assert_internal(last->xblock_size == 1);
-      }
-    }
-    else {  // free range of slices; only last slice needs a valid back offset
-      mi_slice_t* last = &segment->slices[maxindex];
-      if (segment->kind != MI_SEGMENT_HUGE || slice->slice_count <= (segment->slice_entries - segment->segment_info_slices)) {
-        mi_assert_internal((uint8_t*)slice == (uint8_t*)last - last->slice_offset);
-      }
-      mi_assert_internal(slice == last || last->slice_count == 0 );
-      mi_assert_internal(last->xblock_size == 0 || (segment->kind==MI_SEGMENT_HUGE && last->xblock_size==1));
-      if (segment->kind != MI_SEGMENT_HUGE && segment->thread_id != 0) { // segment is not huge or abandoned
-        sq = mi_span_queue_for(slice->slice_count,tld);
-        mi_assert_internal(mi_span_queue_contains(sq,slice));
-      }
-    }
-    slice = &segment->slices[maxindex+1];
-  }
-  mi_assert_internal(slice == end);
-  mi_assert_internal(used_count == segment->used + 1);
-  return true;
-}
-#endif
-
-/* -----------------------------------------------------------
- Segment size calculations
------------------------------------------------------------ */
-
-static size_t mi_segment_info_size(mi_segment_t* segment) {
-  return segment->segment_info_slices * MI_SEGMENT_SLICE_SIZE;
-}
-
-static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, const mi_slice_t* slice, size_t xblock_size, size_t* page_size)
-{
-  ptrdiff_t idx = slice - segment->slices;
-  size_t psize = (size_t)slice->slice_count * MI_SEGMENT_SLICE_SIZE;
-  // make the start not OS page aligned for smaller blocks to avoid page/cache effects
-  size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 1024 ? 3*MI_MAX_ALIGN_GUARANTEE : 0); 
-  if (page_size != NULL) { *page_size = psize - start_offset; }
-  return (uint8_t*)segment + ((idx*MI_SEGMENT_SLICE_SIZE) + start_offset);
-}
-
-// Start of the page available memory; can be used on uninitialized pages
-uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size)
-{
-  const mi_slice_t* slice = mi_page_to_slice((mi_page_t*)page);
-  uint8_t* p = _mi_segment_page_start_from_slice(segment, slice, page->xblock_size, page_size);  
-  mi_assert_internal(page->xblock_size > 0 || _mi_ptr_page(p) == page);
-  mi_assert_internal(_mi_ptr_segment(p) == segment);
-  return p;
-}
-
-
-static size_t mi_segment_calculate_slices(size_t required, size_t* pre_size, size_t* info_slices) {
-  size_t page_size = _mi_os_page_size();
-  size_t isize     = _mi_align_up(sizeof(mi_segment_t), page_size);
-  size_t guardsize = 0;
-  
-  if (MI_SECURE>0) {
-    // in secure mode, we set up a protected page in between the segment info
-    // and the page data (and one at the end of the segment)
-    guardsize = page_size;
-    if (required > 0) {
-      required = _mi_align_up(required, MI_SEGMENT_SLICE_SIZE) + page_size;
-    }
-  }
-
-  if (pre_size != NULL) *pre_size = isize;
-  isize = _mi_align_up(isize + guardsize, MI_SEGMENT_SLICE_SIZE);
-  if (info_slices != NULL) *info_slices = isize / MI_SEGMENT_SLICE_SIZE;
-  size_t segment_size = (required==0 ? MI_SEGMENT_SIZE : _mi_align_up( required + isize + guardsize, MI_SEGMENT_SLICE_SIZE) );  
-  mi_assert_internal(segment_size % MI_SEGMENT_SLICE_SIZE == 0);
-  return (segment_size / MI_SEGMENT_SLICE_SIZE);
-}
-
-
-/* ----------------------------------------------------------------------------
-Segment caches
-We keep a small segment cache per thread to increase local
-reuse and avoid setting/clearing guard pages in secure mode.
-------------------------------------------------------------------------------- */
-
-static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) {
-  if (segment_size>=0) _mi_stat_increase(&tld->stats->segments,1);
-                  else _mi_stat_decrease(&tld->stats->segments,1);
-  tld->count += (segment_size >= 0 ? 1 : -1);
-  if (tld->count > tld->peak_count) tld->peak_count = tld->count;
-  tld->current_size += segment_size;
-  if (tld->current_size > tld->peak_size) tld->peak_size = tld->current_size;
-}
-
-static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
-  segment->thread_id = 0;
-  _mi_segment_map_freed_at(segment);
-  mi_segments_track_size(-((long)mi_segment_size(segment)),tld);
-  if (MI_SECURE>0) {
-    // _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set
-    // unprotect the guard pages; we cannot just unprotect the whole segment size as part may be decommitted
-    size_t os_pagesize = _mi_os_page_size();
-    _mi_os_unprotect((uint8_t*)segment + mi_segment_info_size(segment) - os_pagesize, os_pagesize);
-    uint8_t* end = (uint8_t*)segment + mi_segment_size(segment) - os_pagesize;
-    _mi_os_unprotect(end, os_pagesize);
-  }
-
-  // purge delayed decommits now? (no, leave it to the cache)
-  // mi_segment_delayed_decommit(segment,true,tld->stats);
-  
-  // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats);
-  const size_t size = mi_segment_size(segment);
-  if (size != MI_SEGMENT_SIZE || segment->mem_align_offset != 0 || segment->kind == MI_SEGMENT_HUGE || // only push regular segments on the cache
-       !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) 
-  {
-    const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size);
-    if (csize > 0 && !segment->mem_is_pinned) _mi_stat_decrease(&_mi_stats_main.committed, csize);
-    _mi_abandoned_await_readers();  // wait until safe to free
-    _mi_arena_free(segment, mi_segment_size(segment), segment->mem_alignment, segment->mem_align_offset, segment->memid, segment->mem_is_pinned /* pretend not committed to not double count decommits */, tld->stats);
-  }
-}
-
-// called by threads that are terminating 
-void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
-  MI_UNUSED(tld);
-  // nothing to do
-}
-
-
-/* -----------------------------------------------------------
-   Commit/Decommit ranges
------------------------------------------------------------ */
-
-static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uint8_t* p, size_t size, uint8_t** start_p, size_t* full_size, mi_commit_mask_t* cm) {
-  mi_assert_internal(_mi_ptr_segment(p + 1) == segment);
-  mi_assert_internal(segment->kind != MI_SEGMENT_HUGE);
-  mi_commit_mask_create_empty(cm);
-  if (size == 0 || size > MI_SEGMENT_SIZE || segment->kind == MI_SEGMENT_HUGE) return;
-  const size_t segstart = mi_segment_info_size(segment);
-  const size_t segsize = mi_segment_size(segment);
-  if (p >= (uint8_t*)segment + segsize) return;
-
-  size_t pstart = (p - (uint8_t*)segment);
-  mi_assert_internal(pstart + size <= segsize);
-
-  size_t start;
-  size_t end;
-  if (conservative) {
-    // decommit conservative
-    start = _mi_align_up(pstart, MI_COMMIT_SIZE);
-    end   = _mi_align_down(pstart + size, MI_COMMIT_SIZE);
-    mi_assert_internal(start >= segstart);
-    mi_assert_internal(end <= segsize);
-  }
-  else {
-    // commit liberal
-    start = _mi_align_down(pstart, MI_MINIMAL_COMMIT_SIZE);
-    end   = _mi_align_up(pstart + size, MI_MINIMAL_COMMIT_SIZE);
-  }
-  if (pstart >= segstart && start < segstart) {  // note: the mask is also calculated for an initial commit of the info area
-    start = segstart;
-  }
-  if (end > segsize) {
-    end = segsize;
-  }
-
-  mi_assert_internal(start <= pstart && (pstart + size) <= end);
-  mi_assert_internal(start % MI_COMMIT_SIZE==0 && end % MI_COMMIT_SIZE == 0);
-  *start_p   = (uint8_t*)segment + start;
-  *full_size = (end > start ? end - start : 0);
-  if (*full_size == 0) return;
-
-  size_t bitidx = start / MI_COMMIT_SIZE;
-  mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS);
-  
-  size_t bitcount = *full_size / MI_COMMIT_SIZE; // can be 0
-  if (bitidx + bitcount > MI_COMMIT_MASK_BITS) {
-    _mi_warning_message("commit mask overflow: idx=%zu count=%zu start=%zx end=%zx p=0x%p size=%zu fullsize=%zu\n", bitidx, bitcount, start, end, p, size, *full_size);
-  }
-  mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS);
-  mi_commit_mask_create(bitidx, bitcount, cm);
-}
-
-
-static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) {    
-  mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask));
-
-  // commit liberal, but decommit conservative
-  uint8_t* start = NULL;
-  size_t   full_size = 0;
-  mi_commit_mask_t mask;
-  mi_segment_commit_mask(segment, !commit/*conservative*/, p, size, &start, &full_size, &mask);
-  if (mi_commit_mask_is_empty(&mask) || full_size==0) return true;
-
-  if (commit && !mi_commit_mask_all_set(&segment->commit_mask, &mask)) {
-    bool is_zero = false;
-    mi_commit_mask_t cmask;
-    mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);
-    _mi_stat_decrease(&_mi_stats_main.committed, _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap
-    if (!_mi_os_commit(start,full_size,&is_zero,stats)) return false;    
-    mi_commit_mask_set(&segment->commit_mask, &mask);     
-  }
-  else if (!commit && mi_commit_mask_any_set(&segment->commit_mask, &mask)) {
-    mi_assert_internal((void*)start != (void*)segment);
-    //mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &mask));
-
-    mi_commit_mask_t cmask;
-    mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);
-    _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap
-    if (segment->allow_decommit) { 
-      _mi_os_decommit(start, full_size, stats); // ok if this fails
-    } 
-    mi_commit_mask_clear(&segment->commit_mask, &mask);
-  }
-  // increase expiration of reusing part of the delayed decommit
-  if (commit && mi_commit_mask_any_set(&segment->decommit_mask, &mask)) {
-    segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay);
-  }
-  // always undo delayed decommits
-  mi_commit_mask_clear(&segment->decommit_mask, &mask);
-  return true;
-}
-
-static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
-  mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask));
-  // note: assumes commit_mask is always full for huge segments as otherwise the commit mask bits can overflow
-  if (mi_commit_mask_is_full(&segment->commit_mask) && mi_commit_mask_is_empty(&segment->decommit_mask)) return true; // fully committed
-  return mi_segment_commitx(segment,true,p,size,stats);
-}
-
-static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
-  if (!segment->allow_decommit) return;
-  if (mi_option_get(mi_option_decommit_delay) == 0) {
-    mi_segment_commitx(segment, false, p, size, stats);
-  }
-  else {
-    // register for future decommit in the decommit mask
-    uint8_t* start = NULL;
-    size_t   full_size = 0;
-    mi_commit_mask_t mask; 
-    mi_segment_commit_mask(segment, true /*conservative*/, p, size, &start, &full_size, &mask);
-    if (mi_commit_mask_is_empty(&mask) || full_size==0) return;
-    
-    // update delayed commit
-    mi_assert_internal(segment->decommit_expire > 0 || mi_commit_mask_is_empty(&segment->decommit_mask));      
-    mi_commit_mask_t cmask;
-    mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);  // only decommit what is committed; span_free may try to decommit more
-    mi_commit_mask_set(&segment->decommit_mask, &cmask);
-    mi_msecs_t now = _mi_clock_now();    
-    if (segment->decommit_expire == 0) {
-      // no previous decommits, initialize now
-      segment->decommit_expire = now + mi_option_get(mi_option_decommit_delay);
-    }
-    else if (segment->decommit_expire <= now) {
-      // previous decommit mask already expired
-      if (segment->decommit_expire + mi_option_get(mi_option_decommit_extend_delay) <= now) {
-        mi_segment_delayed_decommit(segment, true, stats);
-      }
-      else {
-        segment->decommit_expire = now + mi_option_get(mi_option_decommit_extend_delay); // (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's
-      }
-    }
-    else {
-      // previous decommit mask is not yet expired, increase the expiration by a bit.
-      segment->decommit_expire += mi_option_get(mi_option_decommit_extend_delay);
-    }
-  }  
-}
-
-static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats) {
-  if (!segment->allow_decommit || mi_commit_mask_is_empty(&segment->decommit_mask)) return;
-  mi_msecs_t now = _mi_clock_now();
-  if (!force && now < segment->decommit_expire) return;
-
-  mi_commit_mask_t mask = segment->decommit_mask;
-  segment->decommit_expire = 0;
-  mi_commit_mask_create_empty(&segment->decommit_mask);
-
-  size_t idx;
-  size_t count;
-  mi_commit_mask_foreach(&mask, idx, count) {
-    // if found, decommit that sequence
-    if (count > 0) {
-      uint8_t* p = (uint8_t*)segment + (idx*MI_COMMIT_SIZE);
-      size_t size = count * MI_COMMIT_SIZE;
-      mi_segment_commitx(segment, false, p, size, stats);
-    }
-  }
-  mi_commit_mask_foreach_end()
-  mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask));
-}
-
-
-/* -----------------------------------------------------------
-   Span free
------------------------------------------------------------ */
-
-static bool mi_segment_is_abandoned(mi_segment_t* segment) {
-  return (segment->thread_id == 0);
-}
-
-// note: can be called on abandoned segments
-static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, bool allow_decommit, mi_segments_tld_t* tld) {
-  mi_assert_internal(slice_index < segment->slice_entries);
-  mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE || mi_segment_is_abandoned(segment) 
-                          ? NULL : mi_span_queue_for(slice_count,tld));
-  if (slice_count==0) slice_count = 1;
-  mi_assert_internal(slice_index + slice_count - 1 < segment->slice_entries);
-
-  // set first and last slice (the intermediates can be undetermined)
-  mi_slice_t* slice = &segment->slices[slice_index];
-  slice->slice_count = (uint32_t)slice_count;
-  mi_assert_internal(slice->slice_count == slice_count); // no overflow?
-  slice->slice_offset = 0;
-  if (slice_count > 1) {
-    mi_slice_t* last = &segment->slices[slice_index + slice_count - 1];
-    last->slice_count = 0;
-    last->slice_offset = (uint32_t)(sizeof(mi_page_t)*(slice_count - 1));
-    last->xblock_size = 0;
-  }
-
-  // perhaps decommit
-  if (allow_decommit) {
-    mi_segment_perhaps_decommit(segment, mi_slice_start(slice), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats);
-  }
-  
-  // and push it on the free page queue (if it was not a huge page)
-  if (sq != NULL) mi_span_queue_push( sq, slice );
-             else slice->xblock_size = 0; // mark huge page as free anyways
-}
-
-/*
-// called from reclaim to add existing free spans
-static void mi_segment_span_add_free(mi_slice_t* slice, mi_segments_tld_t* tld) {
-  mi_segment_t* segment = _mi_ptr_segment(slice);
-  mi_assert_internal(slice->xblock_size==0 && slice->slice_count>0 && slice->slice_offset==0);
-  size_t slice_index = mi_slice_index(slice);
-  mi_segment_span_free(segment,slice_index,slice->slice_count,tld);
-}
-*/
-
-static void mi_segment_span_remove_from_queue(mi_slice_t* slice, mi_segments_tld_t* tld) {
-  mi_assert_internal(slice->slice_count > 0 && slice->slice_offset==0 && slice->xblock_size==0);
-  mi_assert_internal(_mi_ptr_segment(slice)->kind != MI_SEGMENT_HUGE);
-  mi_span_queue_t* sq = mi_span_queue_for(slice->slice_count, tld);
-  mi_span_queue_delete(sq, slice);
-}
-
-// note: can be called on abandoned segments
-static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_tld_t* tld) {
-  mi_assert_internal(slice != NULL && slice->slice_count > 0 && slice->slice_offset == 0);
-  mi_segment_t* segment = _mi_ptr_segment(slice);
-  bool is_abandoned = mi_segment_is_abandoned(segment);
-
-  // for huge pages, just mark as free but don't add to the queues
-  if (segment->kind == MI_SEGMENT_HUGE) {
-    mi_assert_internal(segment->used == 1);  // decreased right after this call in `mi_segment_page_clear`
-    slice->xblock_size = 0;  // mark as free anyways
-    // we should mark the last slice `xblock_size=0` now to maintain invariants but we skip it to 
-    // avoid a possible cache miss (and the segment is about to be freed)
-    return slice;
-  }
-
-  // otherwise coalesce the span and add to the free span queues
-  size_t slice_count = slice->slice_count;
-  mi_slice_t* next = slice + slice->slice_count;
-  mi_assert_internal(next <= mi_segment_slices_end(segment));
-  if (next < mi_segment_slices_end(segment) && next->xblock_size==0) {
-    // free next block -- remove it from free and merge
-    mi_assert_internal(next->slice_count > 0 && next->slice_offset==0);
-    slice_count += next->slice_count; // extend
-    if (!is_abandoned) { mi_segment_span_remove_from_queue(next, tld); }
-  }
-  if (slice > segment->slices) {
-    mi_slice_t* prev = mi_slice_first(slice - 1);
-    mi_assert_internal(prev >= segment->slices);
-    if (prev->xblock_size==0) {
-      // free previous slice -- remove it from free and merge
-      mi_assert_internal(prev->slice_count > 0 && prev->slice_offset==0);
-      slice_count += prev->slice_count;
-      if (!is_abandoned) { mi_segment_span_remove_from_queue(prev, tld); }
-      slice = prev;
-    }
-  }
-
-  // and add the new free page
-  mi_segment_span_free(segment, mi_slice_index(slice), slice_count, true, tld);
-  return slice;
-}
-
-
-
-/* -----------------------------------------------------------
-   Page allocation
------------------------------------------------------------ */
-
-// Note: may still return NULL if committing the memory failed
-static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) {
-  mi_assert_internal(slice_index < segment->slice_entries);
-  mi_slice_t* const slice = &segment->slices[slice_index];
-  mi_assert_internal(slice->xblock_size==0 || slice->xblock_size==1);
-
-  // commit before changing the slice data
-  if (!mi_segment_ensure_committed(segment, _mi_segment_page_start_from_slice(segment, slice, 0, NULL), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats)) {
-    return NULL;  // commit failed!
-  }
-
-  // convert the slices to a page
-  slice->slice_offset = 0;
-  slice->slice_count = (uint32_t)slice_count;
-  mi_assert_internal(slice->slice_count == slice_count);
-  const size_t bsize = slice_count * MI_SEGMENT_SLICE_SIZE;
-  slice->xblock_size = (uint32_t)(bsize >= MI_HUGE_BLOCK_SIZE ? MI_HUGE_BLOCK_SIZE : bsize);
-  mi_page_t*  page = mi_slice_to_page(slice);
-  mi_assert_internal(mi_page_block_size(page) == bsize);
-
-  // set slice back pointers for the first MI_MAX_SLICE_OFFSET entries
-  size_t extra = slice_count-1;
-  if (extra > MI_MAX_SLICE_OFFSET) extra = MI_MAX_SLICE_OFFSET;
-  if (slice_index + extra >= segment->slice_entries) extra = segment->slice_entries - slice_index - 1;  // huge objects may have more slices than avaiable entries in the segment->slices
-  
-  mi_slice_t* slice_next = slice + 1;
-  for (size_t i = 1; i <= extra; i++, slice_next++) {
-    slice_next->slice_offset = (uint32_t)(sizeof(mi_slice_t)*i);
-    slice_next->slice_count = 0;
-    slice_next->xblock_size = 1;
-  }
-
-  // and also for the last one (if not set already) (the last one is needed for coalescing and for large alignments)
-  // note: the cast is needed for ubsan since the index can be larger than MI_SLICES_PER_SEGMENT for huge allocations (see #543)
-  mi_slice_t* last = slice + slice_count - 1;
-  mi_slice_t* end = (mi_slice_t*)mi_segment_slices_end(segment);
-  if (last > end) last = end;
-  if (last > slice) {
-    last->slice_offset = (uint32_t)(sizeof(mi_slice_t) * (last - slice));
-    last->slice_count = 0;
-    last->xblock_size = 1;
-  }
-  
-  // and initialize the page
-  page->is_reset = false;
-  page->is_committed = true;
-  segment->used++;
-  return page;
-}
-
-static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, size_t slice_count, mi_segments_tld_t* tld) {
-  mi_assert_internal(_mi_ptr_segment(slice) == segment);
-  mi_assert_internal(slice->slice_count >= slice_count);
-  mi_assert_internal(slice->xblock_size > 0); // no more in free queue
-  if (slice->slice_count <= slice_count) return;
-  mi_assert_internal(segment->kind != MI_SEGMENT_HUGE);
-  size_t next_index = mi_slice_index(slice) + slice_count;
-  size_t next_count = slice->slice_count - slice_count;
-  mi_segment_span_free(segment, next_index, next_count, false /* don't decommit left-over part */, tld);
-  slice->slice_count = (uint32_t)slice_count;
-}
-
-static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld) {
-  mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_LARGE_OBJ_SIZE_MAX);
-  // search from best fit up
-  mi_span_queue_t* sq = mi_span_queue_for(slice_count, tld);
-  if (slice_count == 0) slice_count = 1;
-  while (sq <= &tld->spans[MI_SEGMENT_BIN_MAX]) {
-    for (mi_slice_t* slice = sq->first; slice != NULL; slice = slice->next) {
-      if (slice->slice_count >= slice_count) {
-        // found one
-        mi_segment_t* segment = _mi_ptr_segment(slice);
-        if (_mi_arena_memid_is_suitable(segment->memid, req_arena_id)) {
-          // found a suitable page span
-          mi_span_queue_delete(sq, slice);
-
-          if (slice->slice_count > slice_count) {
-            mi_segment_slice_split(segment, slice, slice_count, tld);
-          }
-          mi_assert_internal(slice != NULL && slice->slice_count == slice_count && slice->xblock_size > 0);
-          mi_page_t* page = mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count, tld);
-          if (page == NULL) {
-            // commit failed; return NULL but first restore the slice
-            mi_segment_span_free_coalesce(slice, tld);
-            return NULL;
-          }
-          return page;
-        }
-      }
-    }
-    sq++;
-  }
-  // could not find a page..
-  return NULL;
-}
-
-
-/* -----------------------------------------------------------
-   Segment allocation
------------------------------------------------------------ */
-
-static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment, bool eager_delay, mi_arena_id_t req_arena_id,
-                                          size_t* psegment_slices, size_t* ppre_size, size_t* pinfo_slices, 
-                                          mi_commit_mask_t* pcommit_mask, mi_commit_mask_t* pdecommit_mask,
-                                          bool* is_zero, bool* pcommit, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
-
-{
-  // Allocate the segment from the OS
-  bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy    
-  bool is_pinned = false;
-  size_t memid = 0;
-  size_t align_offset = 0;
-  size_t alignment = MI_SEGMENT_ALIGN;
-  
-  if (page_alignment > 0) {
-    // mi_assert_internal(huge_page != NULL);
-    mi_assert_internal(page_alignment >= MI_SEGMENT_ALIGN);
-    alignment = page_alignment;
-    const size_t info_size = (*pinfo_slices) * MI_SEGMENT_SLICE_SIZE;
-    align_offset = _mi_align_up( info_size, MI_SEGMENT_ALIGN );
-    const size_t extra = align_offset - info_size;
-    // recalculate due to potential guard pages
-    *psegment_slices = mi_segment_calculate_slices(required + extra, ppre_size, pinfo_slices);
-    //segment_size += _mi_align_up(align_offset - info_size, MI_SEGMENT_SLICE_SIZE);
-    //segment_slices = segment_size / MI_SEGMENT_SLICE_SIZE;
-  }
-  const size_t segment_size = (*psegment_slices) * MI_SEGMENT_SLICE_SIZE;
-  mi_segment_t* segment = NULL;
-
-  // get from cache?
-  if (page_alignment == 0) {
-    segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, pcommit_mask, pdecommit_mask, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld);
-  }
-  
-  // get from OS
-  if (segment==NULL) {
-    segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, alignment, align_offset, pcommit, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld);
-    if (segment == NULL) return NULL;  // failed to allocate
-    if (*pcommit) {
-      mi_commit_mask_create_full(pcommit_mask);
-    }
-    else {
-      mi_commit_mask_create_empty(pcommit_mask);
-    }
-  }    
-  mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
-
-  const size_t commit_needed = _mi_divide_up((*pinfo_slices)*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE);
-  mi_assert_internal(commit_needed>0);
-  mi_commit_mask_t commit_needed_mask;
-  mi_commit_mask_create(0, commit_needed, &commit_needed_mask);
-  if (!mi_commit_mask_all_set(pcommit_mask, &commit_needed_mask)) {
-    // at least commit the info slices
-    mi_assert_internal(commit_needed*MI_COMMIT_SIZE >= (*pinfo_slices)*MI_SEGMENT_SLICE_SIZE);
-    bool ok = _mi_os_commit(segment, commit_needed*MI_COMMIT_SIZE, is_zero, tld->stats);
-    if (!ok) return NULL; // failed to commit 
-    mi_commit_mask_set(pcommit_mask, &commit_needed_mask); 
-  }
-  mi_track_mem_undefined(segment,commit_needed);
-  segment->memid = memid;
-  segment->mem_is_pinned = is_pinned;
-  segment->mem_is_large = mem_large;
-  segment->mem_is_committed = mi_commit_mask_is_full(pcommit_mask);
-  segment->mem_alignment = alignment;
-  segment->mem_align_offset = align_offset;
-  mi_segments_track_size((long)(segment_size), tld);
-  _mi_segment_map_allocated_at(segment);
-  return segment;
-}
-
-
-// Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` .
-static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page)
-{
-  mi_assert_internal((required==0 && huge_page==NULL) || (required>0 && huge_page != NULL));
-  
-  // calculate needed sizes first
-  size_t info_slices;
-  size_t pre_size;
-  size_t segment_slices = mi_segment_calculate_slices(required, &pre_size, &info_slices);
-  
-  // Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little)
-  const bool eager_delay = (// !_mi_os_has_overcommit() &&             // never delay on overcommit systems
-                            _mi_current_thread_count() > 1 &&       // do not delay for the first N threads
-                            tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
-  const bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit);
-  bool commit = eager || (required > 0);   
-  bool is_zero = false;  
-
-  mi_commit_mask_t commit_mask;
-  mi_commit_mask_t decommit_mask;
-  mi_commit_mask_create_empty(&commit_mask);
-  mi_commit_mask_create_empty(&decommit_mask);
-
-  // Allocate the segment from the OS  
-  mi_segment_t* segment = mi_segment_os_alloc(required, page_alignment, eager_delay, req_arena_id, 
-                                              &segment_slices, &pre_size, &info_slices, &commit_mask, &decommit_mask, 
-                                              &is_zero, &commit, tld, os_tld);
-  if (segment == NULL) return NULL;
-  
-  // zero the segment info? -- not always needed as it may be zero initialized from the OS 
-  mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL);  // tsan
-  if (!is_zero) {
-    ptrdiff_t ofs = offsetof(mi_segment_t, next);
-    size_t    prefix = offsetof(mi_segment_t, slices) - ofs;
-    memset((uint8_t*)segment+ofs, 0, prefix + sizeof(mi_slice_t)*(segment_slices+1));  // one more
-  }
-  
-  segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed
-  segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large);    
-  if (segment->allow_decommit) {
-    segment->decommit_expire = 0; // don't decommit just committed memory // _mi_clock_now() + mi_option_get(mi_option_decommit_delay);
-    segment->decommit_mask = decommit_mask;
-    mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask));
-    #if MI_DEBUG>2
-    const size_t commit_needed = _mi_divide_up(info_slices*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE);
-    mi_commit_mask_t commit_needed_mask;
-    mi_commit_mask_create(0, commit_needed, &commit_needed_mask);
-    mi_assert_internal(!mi_commit_mask_any_set(&segment->decommit_mask, &commit_needed_mask));
-    #endif
-  }    
-  
-  // initialize segment info
-  const size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices);
-  segment->segment_slices = segment_slices;
-  segment->segment_info_slices = info_slices;
-  segment->thread_id = _mi_thread_id();
-  segment->cookie = _mi_ptr_cookie(segment);
-  segment->slice_entries = slice_entries;
-  segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE);
-
-  // memset(segment->slices, 0, sizeof(mi_slice_t)*(info_slices+1));
-  _mi_stat_increase(&tld->stats->page_committed, mi_segment_info_size(segment));
-
-  // set up guard pages
-  size_t guard_slices = 0;
-  if (MI_SECURE>0) {
-    // in secure mode, we set up a protected page in between the segment info
-    // and the page data, and at the end of the segment.
-    size_t os_pagesize = _mi_os_page_size();    
-    mi_assert_internal(mi_segment_info_size(segment) - os_pagesize >= pre_size);
-    _mi_os_protect((uint8_t*)segment + mi_segment_info_size(segment) - os_pagesize, os_pagesize);
-    uint8_t* end = (uint8_t*)segment + mi_segment_size(segment) - os_pagesize;
-    mi_segment_ensure_committed(segment, end, os_pagesize, tld->stats);
-    _mi_os_protect(end, os_pagesize);
-    if (slice_entries == segment_slices) segment->slice_entries--; // don't use the last slice :-(
-    guard_slices = 1;
-  }
-
-  // reserve first slices for segment info
-  mi_page_t* page0 = mi_segment_span_allocate(segment, 0, info_slices, tld);
-  mi_assert_internal(page0!=NULL); if (page0==NULL) return NULL; // cannot fail as we always commit in advance  
-  mi_assert_internal(segment->used == 1);
-  segment->used = 0; // don't count our internal slices towards usage
-  
-  // initialize initial free pages
-  if (segment->kind == MI_SEGMENT_NORMAL) { // not a huge page
-    mi_assert_internal(huge_page==NULL);
-    mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, false /* don't decommit */, tld);
-  }
-  else {
-    mi_assert_internal(huge_page!=NULL);
-    mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask));
-    mi_assert_internal(mi_commit_mask_is_full(&segment->commit_mask));
-    *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices - guard_slices, tld);
-    mi_assert_internal(*huge_page != NULL); // cannot fail as we commit in advance 
-  }
-
-  mi_assert_expensive(mi_segment_is_valid(segment,tld));
-  return segment;
-}
-
-
-static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) {
-  MI_UNUSED(force);
-  mi_assert_internal(segment != NULL);
-  mi_assert_internal(segment->next == NULL);
-  mi_assert_internal(segment->used == 0);
-
-  // Remove the free pages
-  mi_slice_t* slice = &segment->slices[0];
-  const mi_slice_t* end = mi_segment_slices_end(segment);
-  size_t page_count = 0;
-  while (slice < end) {
-    mi_assert_internal(slice->slice_count > 0);
-    mi_assert_internal(slice->slice_offset == 0);
-    mi_assert_internal(mi_slice_index(slice)==0 || slice->xblock_size == 0); // no more used pages ..
-    if (slice->xblock_size == 0 && segment->kind != MI_SEGMENT_HUGE) {
-      mi_segment_span_remove_from_queue(slice, tld);
-    }
-    page_count++;
-    slice = slice + slice->slice_count;
-  }
-  mi_assert_internal(page_count == 2); // first page is allocated by the segment itself
-
-  // stats
-  _mi_stat_decrease(&tld->stats->page_committed, mi_segment_info_size(segment));
-
-  // return it to the OS
-  mi_segment_os_free(segment, tld);
-}
-
-
-/* -----------------------------------------------------------
-   Page Free
------------------------------------------------------------ */
-
-static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld);
-
-// note: can be called on abandoned pages
-static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld) {
-  mi_assert_internal(page->xblock_size > 0);
-  mi_assert_internal(mi_page_all_free(page));
-  mi_segment_t* segment = _mi_ptr_segment(page);
-  mi_assert_internal(segment->used > 0);
-  
-  size_t inuse = page->capacity * mi_page_block_size(page);
-  _mi_stat_decrease(&tld->stats->page_committed, inuse);
-  _mi_stat_decrease(&tld->stats->pages, 1);
-
-  // reset the page memory to reduce memory pressure?
-  if (!segment->mem_is_pinned && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) {
-    size_t psize;
-    uint8_t* start = _mi_page_start(segment, page, &psize);
-    page->is_reset = true;
-    _mi_os_reset(start, psize, tld->stats);
-  }
-
-  // zero the page data, but not the segment fields
-  page->is_zero_init = false;
-  ptrdiff_t ofs = offsetof(mi_page_t, capacity);
-  memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs);
-  page->xblock_size = 1;
-
-  // and free it
-  mi_slice_t* slice = mi_segment_span_free_coalesce(mi_page_to_slice(page), tld);  
-  segment->used--;
-  // cannot assert segment valid as it is called during reclaim
-  // mi_assert_expensive(mi_segment_is_valid(segment, tld));
-  return slice;
-}
-
-void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
-{
-  mi_assert(page != NULL);
-
-  mi_segment_t* segment = _mi_page_segment(page);
-  mi_assert_expensive(mi_segment_is_valid(segment,tld));
-
-  // mark it as free now
-  mi_segment_page_clear(page, tld);
-  mi_assert_expensive(mi_segment_is_valid(segment, tld));
-
-  if (segment->used == 0) {
-    // no more used pages; remove from the free list and free the segment
-    mi_segment_free(segment, force, tld);
-  }
-  else if (segment->used == segment->abandoned) {
-    // only abandoned pages; remove from free list and abandon
-    mi_segment_abandon(segment,tld);
-  }
-}
-
-
-/* -----------------------------------------------------------
-Abandonment
-
-When threads terminate, they can leave segments with
-live blocks (reachable through other threads). Such segments
-are "abandoned" and will be reclaimed by other threads to
-reuse their pages and/or free them eventually
-
-We maintain a global list of abandoned segments that are
-reclaimed on demand. Since this is shared among threads
-the implementation needs to avoid the A-B-A problem on
-popping abandoned segments: <https://en.wikipedia.org/wiki/ABA_problem>
-We use tagged pointers to avoid accidentially identifying
-reused segments, much like stamped references in Java.
-Secondly, we maintain a reader counter to avoid resetting
-or decommitting segments that have a pending read operation.
-
-Note: the current implementation is one possible design;
-another way might be to keep track of abandoned segments
-in the arenas/segment_cache's. This would have the advantage of keeping
-all concurrent code in one place and not needing to deal
-with ABA issues. The drawback is that it is unclear how to
-scan abandoned segments efficiently in that case as they
-would be spread among all other segments in the arenas.
------------------------------------------------------------ */
-
-// Use the bottom 20-bits (on 64-bit) of the aligned segment pointers
-// to put in a tag that increments on update to avoid the A-B-A problem.
-#define MI_TAGGED_MASK   MI_SEGMENT_MASK
-typedef uintptr_t        mi_tagged_segment_t;
-
-static mi_segment_t* mi_tagged_segment_ptr(mi_tagged_segment_t ts) {
-  return (mi_segment_t*)(ts & ~MI_TAGGED_MASK);
-}
-
-static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_segment_t ts) {
-  mi_assert_internal(((uintptr_t)segment & MI_TAGGED_MASK) == 0);
-  uintptr_t tag = ((ts & MI_TAGGED_MASK) + 1) & MI_TAGGED_MASK;
-  return ((uintptr_t)segment | tag);
-}
-
-// This is a list of visited abandoned pages that were full at the time.
-// this list migrates to `abandoned` when that becomes NULL. The use of
-// this list reduces contention and the rate at which segments are visited.
-static mi_decl_cache_align _Atomic(mi_segment_t*)       abandoned_visited; // = NULL
-
-// The abandoned page list (tagged as it supports pop)
-static mi_decl_cache_align _Atomic(mi_tagged_segment_t) abandoned;         // = NULL
-
-// Maintain these for debug purposes (these counts may be a bit off)
-static mi_decl_cache_align _Atomic(size_t)           abandoned_count;
-static mi_decl_cache_align _Atomic(size_t)           abandoned_visited_count;
-
-// We also maintain a count of current readers of the abandoned list
-// in order to prevent resetting/decommitting segment memory if it might
-// still be read.
-static mi_decl_cache_align _Atomic(size_t)           abandoned_readers; // = 0
-
-// Push on the visited list
-static void mi_abandoned_visited_push(mi_segment_t* segment) {
-  mi_assert_internal(segment->thread_id == 0);
-  mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t,&segment->abandoned_next) == NULL);
-  mi_assert_internal(segment->next == NULL);
-  mi_assert_internal(segment->used > 0);
-  mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited);
-  do {
-    mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, anext);
-  } while (!mi_atomic_cas_ptr_weak_release(mi_segment_t, &abandoned_visited, &anext, segment));
-  mi_atomic_increment_relaxed(&abandoned_visited_count);
-}
-
-// Move the visited list to the abandoned list.
-static bool mi_abandoned_visited_revisit(void)
-{
-  // quick check if the visited list is empty
-  if (mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited) == NULL) return false;
-
-  // grab the whole visited list
-  mi_segment_t* first = mi_atomic_exchange_ptr_acq_rel(mi_segment_t, &abandoned_visited, NULL);
-  if (first == NULL) return false;
-
-  // first try to swap directly if the abandoned list happens to be NULL
-  mi_tagged_segment_t afirst;
-  mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned);
-  if (mi_tagged_segment_ptr(ts)==NULL) {
-    size_t count = mi_atomic_load_relaxed(&abandoned_visited_count);
-    afirst = mi_tagged_segment(first, ts);
-    if (mi_atomic_cas_strong_acq_rel(&abandoned, &ts, afirst)) {
-      mi_atomic_add_relaxed(&abandoned_count, count);
-      mi_atomic_sub_relaxed(&abandoned_visited_count, count);
-      return true;
-    }
-  }
-
-  // find the last element of the visited list: O(n)
-  mi_segment_t* last = first;
-  mi_segment_t* next;
-  while ((next = mi_atomic_load_ptr_relaxed(mi_segment_t, &last->abandoned_next)) != NULL) {
-    last = next;
-  }
-
-  // and atomically prepend to the abandoned list
-  // (no need to increase the readers as we don't access the abandoned segments)
-  mi_tagged_segment_t anext = mi_atomic_load_relaxed(&abandoned);
-  size_t count;
-  do {
-    count = mi_atomic_load_relaxed(&abandoned_visited_count);
-    mi_atomic_store_ptr_release(mi_segment_t, &last->abandoned_next, mi_tagged_segment_ptr(anext));
-    afirst = mi_tagged_segment(first, anext);
-  } while (!mi_atomic_cas_weak_release(&abandoned, &anext, afirst));
-  mi_atomic_add_relaxed(&abandoned_count, count);
-  mi_atomic_sub_relaxed(&abandoned_visited_count, count);
-  return true;
-}
-
-// Push on the abandoned list.
-static void mi_abandoned_push(mi_segment_t* segment) {
-  mi_assert_internal(segment->thread_id == 0);
-  mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL);
-  mi_assert_internal(segment->next == NULL);
-  mi_assert_internal(segment->used > 0);
-  mi_tagged_segment_t next;
-  mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned);
-  do {
-    mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, mi_tagged_segment_ptr(ts));
-    next = mi_tagged_segment(segment, ts);
-  } while (!mi_atomic_cas_weak_release(&abandoned, &ts, next));
-  mi_atomic_increment_relaxed(&abandoned_count);
-}
-
-// Wait until there are no more pending reads on segments that used to be in the abandoned list
-// called for example from `arena.c` before decommitting
-void _mi_abandoned_await_readers(void) {
-  size_t n;
-  do {
-    n = mi_atomic_load_acquire(&abandoned_readers);
-    if (n != 0) mi_atomic_yield();
-  } while (n != 0);
-}
-
-// Pop from the abandoned list
-static mi_segment_t* mi_abandoned_pop(void) {
-  mi_segment_t* segment;
-  // Check efficiently if it is empty (or if the visited list needs to be moved)
-  mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned);
-  segment = mi_tagged_segment_ptr(ts);
-  if mi_likely(segment == NULL) {
-    if mi_likely(!mi_abandoned_visited_revisit()) { // try to swap in the visited list on NULL
-      return NULL;
-    }
-  }
-
-  // Do a pop. We use a reader count to prevent
-  // a segment to be decommitted while a read is still pending,
-  // and a tagged pointer to prevent A-B-A link corruption.
-  // (this is called from `region.c:_mi_mem_free` for example)
-  mi_atomic_increment_relaxed(&abandoned_readers);  // ensure no segment gets decommitted
-  mi_tagged_segment_t next = 0;
-  ts = mi_atomic_load_acquire(&abandoned);
-  do {
-    segment = mi_tagged_segment_ptr(ts);
-    if (segment != NULL) {
-      mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next);
-      next = mi_tagged_segment(anext, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted
-    }
-  } while (segment != NULL && !mi_atomic_cas_weak_acq_rel(&abandoned, &ts, next));
-  mi_atomic_decrement_relaxed(&abandoned_readers);  // release reader lock
-  if (segment != NULL) {
-    mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL);
-    mi_atomic_decrement_relaxed(&abandoned_count);
-  }
-  return segment;
-}
-
-/* -----------------------------------------------------------
-   Abandon segment/page
------------------------------------------------------------ */
-
-static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
-  mi_assert_internal(segment->used == segment->abandoned);
-  mi_assert_internal(segment->used > 0);
-  mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL);
-  mi_assert_internal(segment->abandoned_visits == 0);
-  mi_assert_expensive(mi_segment_is_valid(segment,tld));
-  
-  // remove the free pages from the free page queues
-  mi_slice_t* slice = &segment->slices[0];
-  const mi_slice_t* end = mi_segment_slices_end(segment);
-  while (slice < end) {
-    mi_assert_internal(slice->slice_count > 0);
-    mi_assert_internal(slice->slice_offset == 0);
-    if (slice->xblock_size == 0) { // a free page
-      mi_segment_span_remove_from_queue(slice,tld);
-      slice->xblock_size = 0; // but keep it free
-    }
-    slice = slice + slice->slice_count;
-  }
-
-  // perform delayed decommits
-  mi_segment_delayed_decommit(segment, mi_option_is_enabled(mi_option_abandoned_page_decommit) /* force? */, tld->stats);    
-  
-  // all pages in the segment are abandoned; add it to the abandoned list
-  _mi_stat_increase(&tld->stats->segments_abandoned, 1);
-  mi_segments_track_size(-((long)mi_segment_size(segment)), tld);
-  segment->thread_id = 0;
-  mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL);
-  segment->abandoned_visits = 1;   // from 0 to 1 to signify it is abandoned
-  mi_abandoned_push(segment);
-}
-
-void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
-  mi_assert(page != NULL);
-  mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE);
-  mi_assert_internal(mi_page_heap(page) == NULL);
-  mi_segment_t* segment = _mi_page_segment(page);
-
-  mi_assert_expensive(mi_segment_is_valid(segment,tld));
-  segment->abandoned++;  
-
-  _mi_stat_increase(&tld->stats->pages_abandoned, 1);
-  mi_assert_internal(segment->abandoned <= segment->used);
-  if (segment->used == segment->abandoned) {
-    // all pages are abandoned, abandon the entire segment
-    mi_segment_abandon(segment, tld);
-  }
-}
-
-/* -----------------------------------------------------------
-  Reclaim abandoned pages
------------------------------------------------------------ */
-
-static mi_slice_t* mi_slices_start_iterate(mi_segment_t* segment, const mi_slice_t** end) {
-  mi_slice_t* slice = &segment->slices[0];
-  *end = mi_segment_slices_end(segment);
-  mi_assert_internal(slice->slice_count>0 && slice->xblock_size>0); // segment allocated page
-  slice = slice + slice->slice_count; // skip the first segment allocated page
-  return slice;
-}
-
-// Possibly free pages and check if free space is available
-static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, size_t block_size, mi_segments_tld_t* tld) 
-{
-  mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE);
-  mi_assert_internal(mi_segment_is_abandoned(segment));
-  bool has_page = false;
-  
-  // for all slices
-  const mi_slice_t* end;
-  mi_slice_t* slice = mi_slices_start_iterate(segment, &end);
-  while (slice < end) {
-    mi_assert_internal(slice->slice_count > 0);
-    mi_assert_internal(slice->slice_offset == 0);
-    if (mi_slice_is_used(slice)) { // used page
-      // ensure used count is up to date and collect potential concurrent frees
-      mi_page_t* const page = mi_slice_to_page(slice);
-      _mi_page_free_collect(page, false);
-      if (mi_page_all_free(page)) {
-        // if this page is all free now, free it without adding to any queues (yet) 
-        mi_assert_internal(page->next == NULL && page->prev==NULL);
-        _mi_stat_decrease(&tld->stats->pages_abandoned, 1);
-        segment->abandoned--;
-        slice = mi_segment_page_clear(page, tld); // re-assign slice due to coalesce!
-        mi_assert_internal(!mi_slice_is_used(slice));
-        if (slice->slice_count >= slices_needed) {
-          has_page = true;
-        }
-      }
-      else {
-        if (page->xblock_size == block_size && mi_page_has_any_available(page)) {
-          // a page has available free blocks of the right size
-          has_page = true;
-        }
-      }      
-    }
-    else {
-      // empty span
-      if (slice->slice_count >= slices_needed) {
-        has_page = true;
-      }
-    }
-    slice = slice + slice->slice_count;
-  }
-  return has_page;
-}
-
-// Reclaim an abandoned segment; returns NULL if the segment was freed
-// set `right_page_reclaimed` to `true` if it reclaimed a page of the right `block_size` that was not full.
-static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t requested_block_size, bool* right_page_reclaimed, mi_segments_tld_t* tld) {
-  mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL);
-  mi_assert_expensive(mi_segment_is_valid(segment, tld));
-  if (right_page_reclaimed != NULL) { *right_page_reclaimed = false; }
-
-  segment->thread_id = _mi_thread_id();
-  segment->abandoned_visits = 0;
-  mi_segments_track_size((long)mi_segment_size(segment), tld);
-  mi_assert_internal(segment->next == NULL);
-  _mi_stat_decrease(&tld->stats->segments_abandoned, 1);
-  
-  // for all slices
-  const mi_slice_t* end;
-  mi_slice_t* slice = mi_slices_start_iterate(segment, &end);
-  while (slice < end) {
-    mi_assert_internal(slice->slice_count > 0);
-    mi_assert_internal(slice->slice_offset == 0);
-    if (mi_slice_is_used(slice)) {
-      // in use: reclaim the page in our heap
-      mi_page_t* page = mi_slice_to_page(slice);
-      mi_assert_internal(!page->is_reset);
-      mi_assert_internal(page->is_committed);
-      mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE);
-      mi_assert_internal(mi_page_heap(page) == NULL);
-      mi_assert_internal(page->next == NULL && page->prev==NULL);
-      _mi_stat_decrease(&tld->stats->pages_abandoned, 1);
-      segment->abandoned--;
-      // set the heap again and allow delayed free again
-      mi_page_set_heap(page, heap);
-      _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set)
-      _mi_page_free_collect(page, false); // ensure used count is up to date
-      if (mi_page_all_free(page)) {
-        // if everything free by now, free the page
-        slice = mi_segment_page_clear(page, tld);   // set slice again due to coalesceing
-      }
-      else {
-        // otherwise reclaim it into the heap
-        _mi_page_reclaim(heap, page);
-        if (requested_block_size == page->xblock_size && mi_page_has_any_available(page)) {
-          if (right_page_reclaimed != NULL) { *right_page_reclaimed = true; }
-        }
-      }
-    }
-    else {
-      // the span is free, add it to our page queues
-      slice = mi_segment_span_free_coalesce(slice, tld); // set slice again due to coalesceing
-    }
-    mi_assert_internal(slice->slice_count>0 && slice->slice_offset==0);
-    slice = slice + slice->slice_count;
-  }
-
-  mi_assert(segment->abandoned == 0);
-  if (segment->used == 0) {  // due to page_clear
-    mi_assert_internal(right_page_reclaimed == NULL || !(*right_page_reclaimed));
-    mi_segment_free(segment, false, tld);
-    return NULL;
-  }
-  else {
-    return segment;
-  }
-}
-
-
-void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) {
-  mi_segment_t* segment;
-  while ((segment = mi_abandoned_pop()) != NULL) {
-    mi_segment_reclaim(segment, heap, 0, NULL, tld);
-  }
-}
-
-static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slices, size_t block_size, bool* reclaimed, mi_segments_tld_t* tld)
-{
-  *reclaimed = false;
-  mi_segment_t* segment;
-  long max_tries = mi_option_get_clamp(mi_option_max_segment_reclaim, 8, 1024);     // limit the work to bound allocation times
-  while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) {
-    segment->abandoned_visits++;
-    // todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments
-    // and push them into the visited list and use many tries. Perhaps we can skip non-suitable ones in a better way?
-    bool is_suitable = _mi_heap_memid_is_suitable(heap, segment->memid);
-    bool has_page = mi_segment_check_free(segment,needed_slices,block_size,tld); // try to free up pages (due to concurrent frees)
-    if (segment->used == 0) {
-      // free the segment (by forced reclaim) to make it available to other threads.
-      // note1: we prefer to free a segment as that might lead to reclaiming another
-      // segment that is still partially used.
-      // note2: we could in principle optimize this by skipping reclaim and directly
-      // freeing but that would violate some invariants temporarily)
-      mi_segment_reclaim(segment, heap, 0, NULL, tld);
-    }
-    else if (has_page && is_suitable) {
-      // found a large enough free span, or a page of the right block_size with free space 
-      // we return the result of reclaim (which is usually `segment`) as it might free
-      // the segment due to concurrent frees (in which case `NULL` is returned).
-      return mi_segment_reclaim(segment, heap, block_size, reclaimed, tld);
-    }
-    else if (segment->abandoned_visits > 3 && is_suitable) {  
-      // always reclaim on 3rd visit to limit the abandoned queue length.
-      mi_segment_reclaim(segment, heap, 0, NULL, tld);
-    }
-    else {
-      // otherwise, push on the visited list so it gets not looked at too quickly again
-      mi_segment_delayed_decommit(segment, true /* force? */, tld->stats); // forced decommit if needed as we may not visit soon again
-      mi_abandoned_visited_push(segment);
-    }
-  }
-  return NULL;
-}
-
-
-void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld)
-{
-  mi_segment_t* segment;
-  int max_tries = (force ? 16*1024 : 1024); // limit latency
-  if (force) {
-    mi_abandoned_visited_revisit(); 
-  }
-  while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) {
-    mi_segment_check_free(segment,0,0,tld); // try to free up pages (due to concurrent frees)
-    if (segment->used == 0) {
-      // free the segment (by forced reclaim) to make it available to other threads.
-      // note: we could in principle optimize this by skipping reclaim and directly
-      // freeing but that would violate some invariants temporarily)
-      mi_segment_reclaim(segment, heap, 0, NULL, tld);
-    }
-    else {
-      // otherwise, decommit if needed and push on the visited list 
-      // note: forced decommit can be expensive if many threads are destroyed/created as in mstress.
-      mi_segment_delayed_decommit(segment, force, tld->stats);
-      mi_abandoned_visited_push(segment);
-    }
-  }
-}
-
-/* -----------------------------------------------------------
-   Reclaim or allocate
------------------------------------------------------------ */
-
-static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_slices, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
-{
-  mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE);
-  mi_assert_internal(block_size <= MI_LARGE_OBJ_SIZE_MAX);
-  
-  // 1. try to reclaim an abandoned segment
-  bool reclaimed;
-  mi_segment_t* segment = mi_segment_try_reclaim(heap, needed_slices, block_size, &reclaimed, tld);
-  if (reclaimed) {
-    // reclaimed the right page right into the heap
-    mi_assert_internal(segment != NULL);
-    return NULL; // pretend out-of-memory as the page will be in the page queue of the heap with available blocks
-  }
-  else if (segment != NULL) {
-    // reclaimed a segment with a large enough empty span in it
-    return segment;
-  }
-  // 2. otherwise allocate a fresh segment
-  return mi_segment_alloc(0, 0, heap->arena_id, tld, os_tld, NULL);  
-}
-
-
-/* -----------------------------------------------------------
-   Page allocation
------------------------------------------------------------ */
-
-static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_kind, size_t required, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
-{
-  mi_assert_internal(required <= MI_LARGE_OBJ_SIZE_MAX && page_kind <= MI_PAGE_LARGE);
-
-  // find a free page
-  size_t page_size = _mi_align_up(required, (required > MI_MEDIUM_PAGE_SIZE ? MI_MEDIUM_PAGE_SIZE : MI_SEGMENT_SLICE_SIZE));
-  size_t slices_needed = page_size / MI_SEGMENT_SLICE_SIZE;
-  mi_assert_internal(slices_needed * MI_SEGMENT_SLICE_SIZE == page_size);
-  mi_page_t* page = mi_segments_page_find_and_allocate(slices_needed, heap->arena_id, tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld);
-  if (page==NULL) {
-    // no free page, allocate a new segment and try again
-    if (mi_segment_reclaim_or_alloc(heap, slices_needed, block_size, tld, os_tld) == NULL) {
-      // OOM or reclaimed a good page in the heap
-      return NULL;  
-    }
-    else {
-      // otherwise try again
-      return mi_segments_page_alloc(heap, page_kind, required, block_size, tld, os_tld);
-    }
-  }
-  mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size);
-  mi_assert_internal(_mi_ptr_segment(page)->thread_id == _mi_thread_id());
-  mi_segment_delayed_decommit(_mi_ptr_segment(page), false, tld->stats);
-  return page;
-}
-
-
-
-/* -----------------------------------------------------------
-   Huge page allocation
------------------------------------------------------------ */
-
-static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
-{
-  mi_page_t* page = NULL;
-  mi_segment_t* segment = mi_segment_alloc(size,page_alignment,req_arena_id,tld,os_tld,&page);
-  if (segment == NULL || page==NULL) return NULL;
-  mi_assert_internal(segment->used==1);
-  mi_assert_internal(mi_page_block_size(page) >= size);  
-  #if MI_HUGE_PAGE_ABANDON
-  segment->thread_id = 0; // huge segments are immediately abandoned
-  #endif  
-
-  // for huge pages we initialize the xblock_size as we may
-  // overallocate to accommodate large alignments.
-  size_t psize;
-  uint8_t* start = _mi_segment_page_start(segment, page, &psize);
-  page->xblock_size = (psize > MI_HUGE_BLOCK_SIZE ? MI_HUGE_BLOCK_SIZE : (uint32_t)psize);
-  
-  // decommit the part of the prefix of a page that will not be used; this can be quite large (close to MI_SEGMENT_SIZE)
-  if (page_alignment > 0 && segment->allow_decommit) {
-    uint8_t* aligned_p = (uint8_t*)_mi_align_up((uintptr_t)start, page_alignment);
-    mi_assert_internal(_mi_is_aligned(aligned_p, page_alignment));
-    mi_assert_internal(psize - (aligned_p - start) >= size);      
-    uint8_t* decommit_start = start + sizeof(mi_block_t);              // for the free list
-    ptrdiff_t decommit_size = aligned_p - decommit_start;
-    _mi_os_decommit(decommit_start, decommit_size, &_mi_stats_main);   // note: cannot use segment_decommit on huge segments    
-  }
-  
-  return page;
-}
-
-#if MI_HUGE_PAGE_ABANDON
-// free huge block from another thread
-void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) {
-  // huge page segments are always abandoned and can be freed immediately by any thread
-  mi_assert_internal(segment->kind==MI_SEGMENT_HUGE);
-  mi_assert_internal(segment == _mi_page_segment(page));
-  mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id)==0);
-
-  // claim it and free
-  mi_heap_t* heap = mi_heap_get_default(); // issue #221; don't use the internal get_default_heap as we need to ensure the thread is initialized.
-  // paranoia: if this it the last reference, the cas should always succeed
-  size_t expected_tid = 0;
-  if (mi_atomic_cas_strong_acq_rel(&segment->thread_id, &expected_tid, heap->thread_id)) {
-    mi_block_set_next(page, block, page->free);
-    page->free = block;
-    page->used--;
-    page->is_zero = false;
-    mi_assert(page->used == 0);
-    mi_tld_t* tld = heap->tld;
-    _mi_segment_page_free(page, true, &tld->segments);
-  }
-#if (MI_DEBUG!=0)
-  else {
-    mi_assert_internal(false);
-  }
-#endif
-}
-
-#else
-// reset memory of a huge block from another thread
-void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) {
-  MI_UNUSED(page);
-  mi_assert_internal(segment->kind == MI_SEGMENT_HUGE);
-  mi_assert_internal(segment == _mi_page_segment(page));
-  mi_assert_internal(page->used == 1); // this is called just before the free
-  mi_assert_internal(page->free == NULL);
-  if (segment->allow_decommit) {
-    const size_t csize = mi_usable_size(block) - sizeof(mi_block_t);
-    uint8_t* p = (uint8_t*)block + sizeof(mi_block_t);
-    _mi_os_decommit(p, csize, &_mi_stats_main);  // note: cannot use segment_decommit on huge segments
-  }
-}
-#endif
-
-/* -----------------------------------------------------------
-   Page allocation and free
------------------------------------------------------------ */
-mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
-  mi_page_t* page;
-  if mi_unlikely(page_alignment > MI_ALIGNMENT_MAX) {
-    mi_assert_internal(_mi_is_power_of_two(page_alignment));
-    mi_assert_internal(page_alignment >= MI_SEGMENT_SIZE);
-    if (page_alignment < MI_SEGMENT_SIZE) { page_alignment = MI_SEGMENT_SIZE; }
-    page = mi_segment_huge_page_alloc(block_size,page_alignment,heap->arena_id,tld,os_tld);
-  }
-  else if (block_size <= MI_SMALL_OBJ_SIZE_MAX) {
-    page = mi_segments_page_alloc(heap,MI_PAGE_SMALL,block_size,block_size,tld,os_tld);
-  }
-  else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX) {
-    page = mi_segments_page_alloc(heap,MI_PAGE_MEDIUM,MI_MEDIUM_PAGE_SIZE,block_size,tld, os_tld);
-  }
-  else if (block_size <= MI_LARGE_OBJ_SIZE_MAX) {
-    page = mi_segments_page_alloc(heap,MI_PAGE_LARGE,block_size,block_size,tld, os_tld);
-  }
-  else {
-    page = mi_segment_huge_page_alloc(block_size,page_alignment,heap->arena_id,tld,os_tld);    
-  }
-  mi_assert_internal(page == NULL || _mi_heap_memid_is_suitable(heap, _mi_page_segment(page)->memid));
-  mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
-  return page;
-}
-
-
diff --git a/3rd/mimalloc-2.0.9/src/stats.c b/3rd/mimalloc-2.0.9/src/stats.c
deleted file mode 100644
index 2a8b9404..00000000
--- a/3rd/mimalloc-2.0.9/src/stats.c
+++ /dev/null
@@ -1,618 +0,0 @@
-/* ----------------------------------------------------------------------------
-Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
------------------------------------------------------------------------------*/
-#include "mimalloc.h"
-#include "mimalloc-internal.h"
-#include "mimalloc-atomic.h"
-
-#include <stdio.h>  // fputs, stderr
-#include <string.h> // memset
-
-#if defined(_MSC_VER) && (_MSC_VER < 1920)
-#pragma warning(disable:4204)  // non-constant aggregate initializer
-#endif
-
-/* -----------------------------------------------------------
-  Statistics operations
------------------------------------------------------------ */
-
-static bool mi_is_in_main(void* stat) {
-  return ((uint8_t*)stat >= (uint8_t*)&_mi_stats_main
-         && (uint8_t*)stat < ((uint8_t*)&_mi_stats_main + sizeof(mi_stats_t)));
-}
-
-static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
-  if (amount == 0) return;
-  if (mi_is_in_main(stat))
-  {
-    // add atomically (for abandoned pages)
-    int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount);
-    mi_atomic_maxi64_relaxed(&stat->peak, current + amount);
-    if (amount > 0) {
-      mi_atomic_addi64_relaxed(&stat->allocated,amount);
-    }
-    else {
-      mi_atomic_addi64_relaxed(&stat->freed, -amount);
-    }
-  }
-  else {
-    // add thread local
-    stat->current += amount;
-    if (stat->current > stat->peak) stat->peak = stat->current;
-    if (amount > 0) {
-      stat->allocated += amount;
-    }
-    else {
-      stat->freed += -amount;
-    }
-  }
-}
-
-void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) {
-  if (mi_is_in_main(stat)) {
-    mi_atomic_addi64_relaxed( &stat->count, 1 );
-    mi_atomic_addi64_relaxed( &stat->total, (int64_t)amount );
-  }
-  else {
-    stat->count++;
-    stat->total += amount;
-  }
-}
-
-void _mi_stat_increase(mi_stat_count_t* stat, size_t amount) {
-  mi_stat_update(stat, (int64_t)amount);
-}
-
-void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) {
-  mi_stat_update(stat, -((int64_t)amount));
-}
-
-// must be thread safe as it is called from stats_merge
-static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64_t unit) {
-  if (stat==src) return;
-  if (src->allocated==0 && src->freed==0) return;
-  mi_atomic_addi64_relaxed( &stat->allocated, src->allocated * unit);
-  mi_atomic_addi64_relaxed( &stat->current, src->current * unit);
-  mi_atomic_addi64_relaxed( &stat->freed, src->freed * unit);
-  // peak scores do not work across threads..
-  mi_atomic_addi64_relaxed( &stat->peak, src->peak * unit);
-}
-
-static void mi_stat_counter_add(mi_stat_counter_t* stat, const mi_stat_counter_t* src, int64_t unit) {
-  if (stat==src) return;
-  mi_atomic_addi64_relaxed( &stat->total, src->total * unit);
-  mi_atomic_addi64_relaxed( &stat->count, src->count * unit);
-}
-
-// must be thread safe as it is called from stats_merge
-static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
-  if (stats==src) return;
-  mi_stat_add(&stats->segments, &src->segments,1);
-  mi_stat_add(&stats->pages, &src->pages,1);
-  mi_stat_add(&stats->reserved, &src->reserved, 1);
-  mi_stat_add(&stats->committed, &src->committed, 1);
-  mi_stat_add(&stats->reset, &src->reset, 1);
-  mi_stat_add(&stats->page_committed, &src->page_committed, 1);
-
-  mi_stat_add(&stats->pages_abandoned, &src->pages_abandoned, 1);
-  mi_stat_add(&stats->segments_abandoned, &src->segments_abandoned, 1);
-  mi_stat_add(&stats->threads, &src->threads, 1);
-
-  mi_stat_add(&stats->malloc, &src->malloc, 1);
-  mi_stat_add(&stats->segments_cache, &src->segments_cache, 1);
-  mi_stat_add(&stats->normal, &src->normal, 1);
-  mi_stat_add(&stats->huge, &src->huge, 1);
-  mi_stat_add(&stats->large, &src->large, 1);
-
-  mi_stat_counter_add(&stats->pages_extended, &src->pages_extended, 1);
-  mi_stat_counter_add(&stats->mmap_calls, &src->mmap_calls, 1);
-  mi_stat_counter_add(&stats->commit_calls, &src->commit_calls, 1);
-
-  mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1);
-  mi_stat_counter_add(&stats->searches, &src->searches, 1);
-  mi_stat_counter_add(&stats->normal_count, &src->normal_count, 1);
-  mi_stat_counter_add(&stats->huge_count, &src->huge_count, 1);
-  mi_stat_counter_add(&stats->large_count, &src->large_count, 1);
-#if MI_STAT>1
-  for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
-    if (src->normal_bins[i].allocated > 0 || src->normal_bins[i].freed > 0) {
-      mi_stat_add(&stats->normal_bins[i], &src->normal_bins[i], 1);
-    }
-  }
-#endif
-}
-
-/* -----------------------------------------------------------
-  Display statistics
------------------------------------------------------------ */
-
-// unit > 0 : size in binary bytes
-// unit == 0: count as decimal
-// unit < 0 : count in binary
-static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg, const char* fmt) {
-  char buf[32]; buf[0] = 0;
-  int  len = 32;
-  const char* suffix = (unit <= 0 ? " " : "B");
-  const int64_t base = (unit == 0 ? 1000 : 1024);
-  if (unit>0) n *= unit;
-
-  const int64_t pos = (n < 0 ? -n : n);
-  if (pos < base) {
-    if (n!=1 || suffix[0] != 'B') {  // skip printing 1 B for the unit column
-      snprintf(buf, len, "%d %-3s", (int)n, (n==0 ? "" : suffix));
-    }
-  }
-  else {
-    int64_t divider = base;
-    const char* magnitude = "K";
-    if (pos >= divider*base) { divider *= base; magnitude = "M"; }
-    if (pos >= divider*base) { divider *= base; magnitude = "G"; }
-    const int64_t tens = (n / (divider/10));
-    const long whole = (long)(tens/10);
-    const long frac1 = (long)(tens%10);
-    char unitdesc[8];
-    snprintf(unitdesc, 8, "%s%s%s", magnitude, (base==1024 ? "i" : ""), suffix);
-    snprintf(buf, len, "%ld.%ld %-3s", whole, (frac1 < 0 ? -frac1 : frac1), unitdesc);
-  }
-  _mi_fprintf(out, arg, (fmt==NULL ? "%11s" : fmt), buf);
-}
-
-
-static void mi_print_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg) {
-  mi_printf_amount(n,unit,out,arg,NULL);
-}
-
-static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out, void* arg) {
-  if (unit==1) _mi_fprintf(out, arg, "%11s"," ");
-          else mi_print_amount(n,0,out,arg);
-}
-
-static void mi_stat_print_ex(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg, const char* notok ) {
-  _mi_fprintf(out, arg,"%10s:", msg);
-  if (unit > 0) {
-    mi_print_amount(stat->peak, unit, out, arg);
-    mi_print_amount(stat->allocated, unit, out, arg);
-    mi_print_amount(stat->freed, unit, out, arg);
-    mi_print_amount(stat->current, unit, out, arg);
-    mi_print_amount(unit, 1, out, arg);
-    mi_print_count(stat->allocated, unit, out, arg);
-    if (stat->allocated > stat->freed) {
-      _mi_fprintf(out, arg, "  ");
-      _mi_fprintf(out, arg, (notok == NULL ? "not all freed!" : notok));
-      _mi_fprintf(out, arg, "\n");
-    }
-    else {
-      _mi_fprintf(out, arg, "  ok\n");
-    }
-  }
-  else if (unit<0) {
-    mi_print_amount(stat->peak, -1, out, arg);
-    mi_print_amount(stat->allocated, -1, out, arg);
-    mi_print_amount(stat->freed, -1, out, arg);
-    mi_print_amount(stat->current, -1, out, arg);
-    if (unit==-1) {
-      _mi_fprintf(out, arg, "%22s", "");
-    }
-    else {
-      mi_print_amount(-unit, 1, out, arg);
-      mi_print_count((stat->allocated / -unit), 0, out, arg);
-    }
-    if (stat->allocated > stat->freed)
-      _mi_fprintf(out, arg, "  not all freed!\n");
-    else
-      _mi_fprintf(out, arg, "  ok\n");
-  }
-  else {
-    mi_print_amount(stat->peak, 1, out, arg);
-    mi_print_amount(stat->allocated, 1, out, arg);
-    _mi_fprintf(out, arg, "%11s", " ");  // no freed
-    mi_print_amount(stat->current, 1, out, arg);
-    _mi_fprintf(out, arg, "\n");
-  }
-}
-
-static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg) {
-  mi_stat_print_ex(stat, msg, unit, out, arg, NULL);
-}
-
-static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) {
-  _mi_fprintf(out, arg, "%10s:", msg);
-  mi_print_amount(stat->total, -1, out, arg);
-  _mi_fprintf(out, arg, "\n");
-}
-
-static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg) {
-  const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count));
-  const long avg_whole = (long)(avg_tens/10);
-  const long avg_frac1 = (long)(avg_tens%10);
-  _mi_fprintf(out, arg, "%10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1);
-}
-
-
-static void mi_print_header(mi_output_fun* out, void* arg ) {
-  _mi_fprintf(out, arg, "%10s: %10s %10s %10s %10s %10s %10s\n", "heap stats", "peak   ", "total   ", "freed   ", "current   ", "unit   ", "count   ");
-}
-
-#if MI_STAT>1
-static void mi_stats_print_bins(const mi_stat_count_t* bins, size_t max, const char* fmt, mi_output_fun* out, void* arg) {
-  bool found = false;
-  char buf[64];
-  for (size_t i = 0; i <= max; i++) {
-    if (bins[i].allocated > 0) {
-      found = true;
-      int64_t unit = _mi_bin_size((uint8_t)i);
-      snprintf(buf, 64, "%s %3lu", fmt, (long)i);
-      mi_stat_print(&bins[i], buf, unit, out, arg);
-    }
-  }
-  if (found) {
-    _mi_fprintf(out, arg, "\n");
-    mi_print_header(out, arg);
-  }
-}
-#endif
-
-
-
-//------------------------------------------------------------
-// Use an output wrapper for line-buffered output
-// (which is nice when using loggers etc.)
-//------------------------------------------------------------
-typedef struct buffered_s {
-  mi_output_fun* out;   // original output function
-  void*          arg;   // and state
-  char*          buf;   // local buffer of at least size `count+1`
-  size_t         used;  // currently used chars `used <= count`
-  size_t         count; // total chars available for output
-} buffered_t;
-
-static void mi_buffered_flush(buffered_t* buf) {
-  buf->buf[buf->used] = 0;
-  _mi_fputs(buf->out, buf->arg, NULL, buf->buf);
-  buf->used = 0;
-}
-
-static void mi_cdecl mi_buffered_out(const char* msg, void* arg) {
-  buffered_t* buf = (buffered_t*)arg;
-  if (msg==NULL || buf==NULL) return;
-  for (const char* src = msg; *src != 0; src++) {
-    char c = *src;
-    if (buf->used >= buf->count) mi_buffered_flush(buf);
-    mi_assert_internal(buf->used < buf->count);
-    buf->buf[buf->used++] = c;
-    if (c == '\n') mi_buffered_flush(buf);
-  }
-}
-
-//------------------------------------------------------------
-// Print statistics
-//------------------------------------------------------------
-
-static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults);
-
-static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_attr_noexcept {
-  // wrap the output function to be line buffered
-  char buf[256];
-  buffered_t buffer = { out0, arg0, NULL, 0, 255 };
-  buffer.buf = buf;
-  mi_output_fun* out = &mi_buffered_out;
-  void* arg = &buffer;
-
-  // and print using that
-  mi_print_header(out,arg);
-  #if MI_STAT>1
-  mi_stats_print_bins(stats->normal_bins, MI_BIN_HUGE, "normal",out,arg);
-  #endif
-  #if MI_STAT
-  mi_stat_print(&stats->normal, "normal", (stats->normal_count.count == 0 ? 1 : -(stats->normal.allocated / stats->normal_count.count)), out, arg);
-  mi_stat_print(&stats->large, "large", (stats->large_count.count == 0 ? 1 : -(stats->large.allocated / stats->large_count.count)), out, arg);
-  mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out, arg);
-  mi_stat_count_t total = { 0,0,0,0 };
-  mi_stat_add(&total, &stats->normal, 1);
-  mi_stat_add(&total, &stats->large, 1);
-  mi_stat_add(&total, &stats->huge, 1);
-  mi_stat_print(&total, "total", 1, out, arg);
-  #endif
-  #if MI_STAT>1
-  mi_stat_print(&stats->malloc, "malloc req", 1, out, arg);
-  _mi_fprintf(out, arg, "\n");
-  #endif
-  mi_stat_print_ex(&stats->reserved, "reserved", 1, out, arg, "");
-  mi_stat_print_ex(&stats->committed, "committed", 1, out, arg, "");
-  mi_stat_print(&stats->reset, "reset", 1, out, arg);
-  mi_stat_print(&stats->page_committed, "touched", 1, out, arg);
-  mi_stat_print(&stats->segments, "segments", -1, out, arg);
-  mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out, arg);
-  mi_stat_print(&stats->segments_cache, "-cached", -1, out, arg);
-  mi_stat_print(&stats->pages, "pages", -1, out, arg);
-  mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out, arg);
-  mi_stat_counter_print(&stats->pages_extended, "-extended", out, arg);
-  mi_stat_counter_print(&stats->page_no_retire, "-noretire", out, arg);
-  mi_stat_counter_print(&stats->mmap_calls, "mmaps", out, arg);
-  mi_stat_counter_print(&stats->commit_calls, "commits", out, arg);
-  mi_stat_print(&stats->threads, "threads", -1, out, arg);
-  mi_stat_counter_print_avg(&stats->searches, "searches", out, arg);
-  _mi_fprintf(out, arg, "%10s: %7zu\n", "numa nodes", _mi_os_numa_node_count());
-
-  mi_msecs_t elapsed;
-  mi_msecs_t user_time;
-  mi_msecs_t sys_time;
-  size_t current_rss;
-  size_t peak_rss;
-  size_t current_commit;
-  size_t peak_commit;
-  size_t page_faults;
-  mi_stat_process_info(&elapsed, &user_time, &sys_time, &current_rss, &peak_rss, &current_commit, &peak_commit, &page_faults);
-  _mi_fprintf(out, arg, "%10s: %7ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000);
-  _mi_fprintf(out, arg, "%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, rss: ", "process",
-              user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults );
-  mi_printf_amount((int64_t)peak_rss, 1, out, arg, "%s");
-  if (peak_commit > 0) {
-    _mi_fprintf(out, arg, ", commit: ");
-    mi_printf_amount((int64_t)peak_commit, 1, out, arg, "%s");
-  }
-  _mi_fprintf(out, arg, "\n");
-}
-
-static mi_msecs_t mi_process_start; // = 0
-
-static mi_stats_t* mi_stats_get_default(void) {
-  mi_heap_t* heap = mi_heap_get_default();
-  return &heap->tld->stats;
-}
-
-static void mi_stats_merge_from(mi_stats_t* stats) {
-  if (stats != &_mi_stats_main) {
-    mi_stats_add(&_mi_stats_main, stats);
-    memset(stats, 0, sizeof(mi_stats_t));
-  }
-}
-
-void mi_stats_reset(void) mi_attr_noexcept {
-  mi_stats_t* stats = mi_stats_get_default();
-  if (stats != &_mi_stats_main) { memset(stats, 0, sizeof(mi_stats_t)); }
-  memset(&_mi_stats_main, 0, sizeof(mi_stats_t));
-  if (mi_process_start == 0) { mi_process_start = _mi_clock_start(); };
-}
-
-void mi_stats_merge(void) mi_attr_noexcept {
-  mi_stats_merge_from( mi_stats_get_default() );
-}
-
-void _mi_stats_done(mi_stats_t* stats) {  // called from `mi_thread_done`
-  mi_stats_merge_from(stats);
-}
-
-void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept {
-  mi_stats_merge_from(mi_stats_get_default());
-  _mi_stats_print(&_mi_stats_main, out, arg);
-}
-
-void mi_stats_print(void* out) mi_attr_noexcept {
-  // for compatibility there is an `out` parameter (which can be `stdout` or `stderr`)
-  mi_stats_print_out((mi_output_fun*)out, NULL);
-}
-
-void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept {
-  _mi_stats_print(mi_stats_get_default(), out, arg);
-}
-
-
-// ----------------------------------------------------------------
-// Basic timer for convenience; use milli-seconds to avoid doubles
-// ----------------------------------------------------------------
-#ifdef _WIN32
-#include <windows.h>
-static mi_msecs_t mi_to_msecs(LARGE_INTEGER t) {
-  static LARGE_INTEGER mfreq; // = 0
-  if (mfreq.QuadPart == 0LL) {
-    LARGE_INTEGER f;
-    QueryPerformanceFrequency(&f);
-    mfreq.QuadPart = f.QuadPart/1000LL;
-    if (mfreq.QuadPart == 0) mfreq.QuadPart = 1;
-  }
-  return (mi_msecs_t)(t.QuadPart / mfreq.QuadPart);
-}
-
-mi_msecs_t _mi_clock_now(void) {
-  LARGE_INTEGER t;
-  QueryPerformanceCounter(&t);
-  return mi_to_msecs(t);
-}
-#else
-#include <time.h>
-#if defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC)
-mi_msecs_t _mi_clock_now(void) {
-  struct timespec t;
-  #ifdef CLOCK_MONOTONIC
-  clock_gettime(CLOCK_MONOTONIC, &t);
-  #else
-  clock_gettime(CLOCK_REALTIME, &t);
-  #endif
-  return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000);
-}
-#else
-// low resolution timer
-mi_msecs_t _mi_clock_now(void) {
-  return ((mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000));
-}
-#endif
-#endif
-
-
-static mi_msecs_t mi_clock_diff;
-
-mi_msecs_t _mi_clock_start(void) {
-  if (mi_clock_diff == 0.0) {
-    mi_msecs_t t0 = _mi_clock_now();
-    mi_clock_diff = _mi_clock_now() - t0;
-  }
-  return _mi_clock_now();
-}
-
-mi_msecs_t _mi_clock_end(mi_msecs_t start) {
-  mi_msecs_t end = _mi_clock_now();
-  return (end - start - mi_clock_diff);
-}
-
-
-// --------------------------------------------------------
-// Basic process statistics
-// --------------------------------------------------------
-
-#if defined(_WIN32)
-#include <windows.h>
-
-static mi_msecs_t filetime_msecs(const FILETIME* ftime) {
-  ULARGE_INTEGER i;
-  i.LowPart = ftime->dwLowDateTime;
-  i.HighPart = ftime->dwHighDateTime;
-  mi_msecs_t msecs = (i.QuadPart / 10000); // FILETIME is in 100 nano seconds
-  return msecs;
-}
-
-typedef struct _PROCESS_MEMORY_COUNTERS {
-  DWORD cb;
-  DWORD PageFaultCount;
-  SIZE_T PeakWorkingSetSize;
-  SIZE_T WorkingSetSize;
-  SIZE_T QuotaPeakPagedPoolUsage;
-  SIZE_T QuotaPagedPoolUsage;
-  SIZE_T QuotaPeakNonPagedPoolUsage;
-  SIZE_T QuotaNonPagedPoolUsage;
-  SIZE_T PagefileUsage;
-  SIZE_T PeakPagefileUsage;
-} PROCESS_MEMORY_COUNTERS;
-typedef PROCESS_MEMORY_COUNTERS* PPROCESS_MEMORY_COUNTERS;
-typedef BOOL (WINAPI *PGetProcessMemoryInfo)(HANDLE, PPROCESS_MEMORY_COUNTERS, DWORD);
-static PGetProcessMemoryInfo pGetProcessMemoryInfo = NULL;
-
-static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults)
-{
-  *elapsed = _mi_clock_end(mi_process_start);
-  FILETIME ct;
-  FILETIME ut;
-  FILETIME st;
-  FILETIME et;
-  GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut);
-  *utime = filetime_msecs(&ut);
-  *stime = filetime_msecs(&st);
-  
-  // load psapi on demand
-  if (pGetProcessMemoryInfo == NULL) {
-    HINSTANCE hDll = LoadLibrary(TEXT("psapi.dll"));
-    if (hDll != NULL) {
-      pGetProcessMemoryInfo = (PGetProcessMemoryInfo)(void (*)(void))GetProcAddress(hDll, "GetProcessMemoryInfo");
-    }
-  }
-
-  // get process info
-  PROCESS_MEMORY_COUNTERS info;
-  memset(&info, 0, sizeof(info));
-  if (pGetProcessMemoryInfo != NULL) {
-    pGetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info));
-  } 
-  *current_rss    = (size_t)info.WorkingSetSize;
-  *peak_rss       = (size_t)info.PeakWorkingSetSize;
-  *current_commit = (size_t)info.PagefileUsage;
-  *peak_commit    = (size_t)info.PeakPagefileUsage;
-  *page_faults    = (size_t)info.PageFaultCount;
-}
-
-#elif !defined(__wasi__) && (defined(__unix__) || defined(__unix) || defined(unix) || defined(__APPLE__) || defined(__HAIKU__))
-#include <stdio.h>
-#include <unistd.h>
-#include <sys/resource.h>
-
-#if defined(__APPLE__)
-#include <mach/mach.h>
-#endif
-
-#if defined(__HAIKU__)
-#include <kernel/OS.h>
-#endif
-
-static mi_msecs_t timeval_secs(const struct timeval* tv) {
-  return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L);
-}
-
-static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults)
-{
-  *elapsed = _mi_clock_end(mi_process_start);
-  struct rusage rusage;
-  getrusage(RUSAGE_SELF, &rusage);
-  *utime = timeval_secs(&rusage.ru_utime);
-  *stime = timeval_secs(&rusage.ru_stime);
-#if !defined(__HAIKU__)
-  *page_faults = rusage.ru_majflt;
-#endif
-  // estimate commit using our stats
-  *peak_commit    = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak));
-  *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current));
-  *current_rss    = *current_commit;  // estimate
-#if defined(__HAIKU__)
-  // Haiku does not have (yet?) a way to
-  // get these stats per process
-  thread_info tid;
-  area_info mem;
-  ssize_t c;
-  get_thread_info(find_thread(0), &tid);
-  while (get_next_area_info(tid.team, &c, &mem) == B_OK) {
-    *peak_rss += mem.ram_size;
-  }
-  *page_faults = 0;
-#elif defined(__APPLE__)
-  *peak_rss = rusage.ru_maxrss;         // BSD reports in bytes
-  struct mach_task_basic_info info;
-  mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;
-  if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) {
-    *current_rss = (size_t)info.resident_size;
-  }
-#else
-  *peak_rss = rusage.ru_maxrss * 1024;  // Linux reports in KiB
-#endif
-}
-
-#else
-#ifndef __wasi__
-// WebAssembly instances are not processes
-#pragma message("define a way to get process info")
-#endif
-
-static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults)
-{
-  *elapsed = _mi_clock_end(mi_process_start);
-  *peak_commit    = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak));
-  *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current));
-  *peak_rss    = *peak_commit;
-  *current_rss = *current_commit;
-  *page_faults = 0;
-  *utime = 0;
-  *stime = 0;
-}
-#endif
-
-
-mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept
-{
-  mi_msecs_t elapsed = 0;
-  mi_msecs_t utime = 0;
-  mi_msecs_t stime = 0;
-  size_t current_rss0 = 0;
-  size_t peak_rss0 = 0;
-  size_t current_commit0 = 0;
-  size_t peak_commit0 = 0;
-  size_t page_faults0 = 0;
-  mi_stat_process_info(&elapsed,&utime, &stime, &current_rss0, &peak_rss0, &current_commit0, &peak_commit0, &page_faults0);
-  if (elapsed_msecs!=NULL)  *elapsed_msecs = (elapsed < 0 ? 0 : (elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)elapsed : PTRDIFF_MAX));
-  if (user_msecs!=NULL)     *user_msecs     = (utime < 0 ? 0 : (utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)utime : PTRDIFF_MAX));
-  if (system_msecs!=NULL)   *system_msecs   = (stime < 0 ? 0 : (stime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)stime : PTRDIFF_MAX));
-  if (current_rss!=NULL)    *current_rss    = current_rss0;
-  if (peak_rss!=NULL)       *peak_rss       = peak_rss0;
-  if (current_commit!=NULL) *current_commit = current_commit0;
-  if (peak_commit!=NULL)    *peak_commit    = peak_commit0;
-  if (page_faults!=NULL)    *page_faults    = page_faults0;
-}
diff --git a/3rd/mimalloc-2.0.9/test/main-override.cpp b/3rd/mimalloc-2.0.9/test/main-override.cpp
deleted file mode 100644
index 7242eb29..00000000
--- a/3rd/mimalloc-2.0.9/test/main-override.cpp
+++ /dev/null
@@ -1,343 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <assert.h>
-#include <string.h>
-#include <stdint.h>
-
-#include <mimalloc.h>
-#include <new>
-#include <vector>
-#include <future>
-#include <iostream>
-
-#include <thread>
-#include <mimalloc.h>
-#include <assert.h>
-
-#ifdef _WIN32
-#include <mimalloc-new-delete.h>
-#endif
-
-#ifdef _WIN32
-#include <Windows.h>
-static void msleep(unsigned long msecs) { Sleep(msecs); }
-#else
-#include <unistd.h>
-static void msleep(unsigned long msecs) { usleep(msecs * 1000UL); }
-#endif
-
-static void heap_thread_free_large(); // issue #221
-static void heap_no_delete();         // issue #202
-static void heap_late_free();         // issue #204
-static void padding_shrink();         // issue #209
-static void various_tests();
-static void test_mt_shutdown();
-static void large_alloc(void);        // issue #363
-static void fail_aslr();              // issue #372
-static void tsan_numa_test();         // issue #414
-static void strdup_test();            // issue #445 
-static void bench_alloc_large(void);  // issue #xxx
-static void heap_thread_free_huge();
-
-static void test_stl_allocators();
-
-
-int main() {
-  mi_stats_reset();  // ignore earlier allocations
-  heap_thread_free_huge();
-  /*
-   heap_thread_free_large();
-   heap_no_delete();
-   heap_late_free();
-   padding_shrink();
-   various_tests();
-   large_alloc();
-   tsan_numa_test();
-   strdup_test();
-  */
-  test_stl_allocators();
-  test_mt_shutdown();
-  
-  //fail_aslr();
-  bench_alloc_large();
-  mi_stats_print(NULL);
-  return 0;
-}
-
-static void* p = malloc(8);
-
-void free_p() {
-  free(p);
-  return;
-}
-
-class Test {
-private:
-  int i;
-public:
-  Test(int x) { i = x; }
-  ~Test() { }
-};
-
-
-static void various_tests() {
-  atexit(free_p);
-  void* p1 = malloc(78);
-  void* p2 = mi_malloc_aligned(24, 16);
-  free(p1);
-  p1 = malloc(8);
-  char* s = mi_strdup("hello\n");
-
-  mi_free(p2);
-  p2 = malloc(16);
-  p1 = realloc(p1, 32);
-  free(p1);
-  free(p2);
-  mi_free(s);
-
-  Test* t = new Test(42);
-  delete t;
-  t = new (std::nothrow) Test(42);
-  delete t;
-}
-
-class Static {
-private:
-  void* p;
-public:
-  Static() {
-    p = malloc(64);
-    return;
-  }
-  ~Static() {
-    free(p);
-    return;
-  }
-};
-
-static Static s = Static();
-
-
-static bool test_stl_allocator1() {
-  std::vector<int, mi_stl_allocator<int> > vec;
-  vec.push_back(1);
-  vec.pop_back();
-  return vec.size() == 0;
-}
-
-struct some_struct { int i; int j; double z; };
-
-static bool test_stl_allocator2() {
-  std::vector<some_struct, mi_stl_allocator<some_struct> > vec;
-  vec.push_back(some_struct());
-  vec.pop_back();
-  return vec.size() == 0;
-}
-
-static bool test_stl_allocator3() {
-  std::vector<int, mi_heap_stl_allocator<int> > vec;
-  vec.push_back(1);
-  vec.pop_back();
-  return vec.size() == 0;
-}
-
-static bool test_stl_allocator4() {
-  std::vector<some_struct, mi_heap_stl_allocator<some_struct> > vec;
-  vec.push_back(some_struct());
-  vec.pop_back();
-  return vec.size() == 0;
-}
-
-static bool test_stl_allocator5() {
-  std::vector<int, mi_heap_destroy_stl_allocator<int> > vec;
-  vec.push_back(1);
-  vec.pop_back();
-  return vec.size() == 0;
-}
-
-static bool test_stl_allocator6() {
-  std::vector<some_struct, mi_heap_destroy_stl_allocator<some_struct> > vec;
-  vec.push_back(some_struct());
-  vec.pop_back();
-  return vec.size() == 0;
-}
-
-static void test_stl_allocators() {
-  test_stl_allocator1();
-  test_stl_allocator2();
-  test_stl_allocator3();
-  test_stl_allocator4();
-  test_stl_allocator5();
-  test_stl_allocator6();
-}
-
-// issue 445
-static void strdup_test() {
-#ifdef _MSC_VER
-  char* s = _strdup("hello\n");
-  char* buf = NULL;
-  size_t len;
-  _dupenv_s(&buf, &len, "MIMALLOC_VERBOSE");
-  mi_free(buf);
-  mi_free(s);
-#endif
-}
-
-// Issue #202
-static void heap_no_delete_worker() {
-  mi_heap_t* heap = mi_heap_new();
-  void* q = mi_heap_malloc(heap, 1024); (void)(q);
-  // mi_heap_delete(heap); // uncomment to prevent assertion
-}
-
-static void heap_no_delete() {
-  auto t1 = std::thread(heap_no_delete_worker);
-  t1.join();
-}
-
-
-// Issue #204
-static volatile void* global_p;
-
-static void t1main() {
-  mi_heap_t* heap = mi_heap_new();
-  global_p = mi_heap_malloc(heap, 1024);
-  mi_heap_delete(heap);
-}
-
-static void heap_late_free() {
-  auto t1 = std::thread(t1main);
-
-  msleep(2000);
-  assert(global_p);
-  mi_free((void*)global_p);
-
-  t1.join();
-}
-
-// issue  #209
-static void* shared_p;
-static void alloc0(/* void* arg */)
-{
-  shared_p = mi_malloc(8);
-}
-
-static void padding_shrink(void)
-{
-  auto t1 = std::thread(alloc0);
-  t1.join();
-  mi_free(shared_p);
-}
-
-
-// Issue #221
-static void heap_thread_free_large_worker() {
-  mi_free(shared_p);
-}
-
-static void heap_thread_free_large() {
-  for (int i = 0; i < 100; i++) {
-    shared_p = mi_malloc_aligned(2 * 1024 * 1024 + 1, 8);
-    auto t1 = std::thread(heap_thread_free_large_worker);
-    t1.join();
-  }
-}
-
-static void heap_thread_free_huge_worker() {
-  mi_free(shared_p);
-}
-
-static void heap_thread_free_huge() {
-  for (int i = 0; i < 100; i++) {
-    shared_p = mi_malloc(1024 * 1024 * 1024);
-    auto t1 = std::thread(heap_thread_free_huge_worker);
-    t1.join();
-  }
-}
-
-static void test_mt_shutdown()
-{
-  const int threads = 5;
-  std::vector< std::future< std::vector< char* > > > ts;
-
-  auto fn = [&]()
-  {
-    std::vector< char* > ps;
-    ps.reserve(1000);
-    for (int i = 0; i < 1000; i++)
-      ps.emplace_back(new char[1]);
-    return ps;
-  };
-
-  for (int i = 0; i < threads; i++)
-    ts.emplace_back(std::async(std::launch::async, fn));
-
-  for (auto& f : ts)
-    for (auto& p : f.get())
-      delete[] p;
-
-  std::cout << "done" << std::endl;
-}
-
-// issue #363
-using namespace std;
-
-void large_alloc(void)
-{
-  char* a = new char[1ull << 25];
-  thread th([&] {
-    delete[] a;
-    });
-  th.join();
-}
-
-// issue #372
-static void fail_aslr() {
-  size_t sz = (4ULL << 40); // 4TiB
-  void* p = malloc(sz);
-  printf("pointer p: %p: area up to %p\n", p, (uint8_t*)p + sz);
-  *(int*)0x5FFFFFFF000 = 0;  // should segfault
-}
-
-// issues #414
-static void dummy_worker() {
-  void* p = mi_malloc(0);
-  mi_free(p);
-}
-
-static void tsan_numa_test() {
-  auto t1 = std::thread(dummy_worker);
-  dummy_worker();
-  t1.join();
-}
-
-// issue #?
-#include <chrono>
-#include <random>
-#include <iostream>
-
-static void bench_alloc_large(void) {
-  static constexpr int kNumBuffers = 20;
-  static constexpr size_t kMinBufferSize = 5 * 1024 * 1024;
-  static constexpr size_t kMaxBufferSize = 25 * 1024 * 1024;
-  std::unique_ptr<char[]> buffers[kNumBuffers];
-
-  std::random_device rd;
-  std::mt19937 gen(42); //rd());
-  std::uniform_int_distribution<> size_distribution(kMinBufferSize, kMaxBufferSize);
-  std::uniform_int_distribution<> buf_number_distribution(0, kNumBuffers - 1);
-
-  static constexpr int kNumIterations = 2000;
-  const auto start = std::chrono::steady_clock::now();
-  for (int i = 0; i < kNumIterations; ++i) {
-    int buffer_idx = buf_number_distribution(gen);
-    size_t new_size = size_distribution(gen);
-    buffers[buffer_idx] = std::make_unique<char[]>(new_size);
-  }
-  const auto end = std::chrono::steady_clock::now();
-  const auto num_ms = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
-  const auto us_per_allocation = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / kNumIterations;
-  std::cout << kNumIterations << " allocations Done in " << num_ms << "ms." << std::endl;
-  std::cout << "Avg " << us_per_allocation << " us per allocation" << std::endl;
-}
-
diff --git a/3rd/mimalloc-2.0.9/.gitattributes b/3rd/mimalloc-3.2.6/.gitattributes
similarity index 100%
rename from 3rd/mimalloc-2.0.9/.gitattributes
rename to 3rd/mimalloc-3.2.6/.gitattributes
diff --git a/3rd/mimalloc-2.0.9/.gitignore b/3rd/mimalloc-3.2.6/.gitignore
similarity index 74%
rename from 3rd/mimalloc-2.0.9/.gitignore
rename to 3rd/mimalloc-3.2.6/.gitignore
index f8b7f5eb..bcedbc3f 100644
--- a/3rd/mimalloc-2.0.9/.gitignore
+++ b/3rd/mimalloc-3.2.6/.gitignore
@@ -1,9 +1,13 @@
+build
 ide/vs20??/*.db
 ide/vs20??/*.opendb
 ide/vs20??/*.user
-ide/vs20??/*.vcxproj.filters
 ide/vs20??/.vs
 ide/vs20??/VTune*
 out/
 docs/
 *.zip
+*.tar
+*.gz
+.vscode
+.DS_STore
diff --git a/3rd/mimalloc-3.2.6/CMakeLists.txt b/3rd/mimalloc-3.2.6/CMakeLists.txt
new file mode 100644
index 00000000..7b8dcdc3
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/CMakeLists.txt
@@ -0,0 +1,802 @@
+cmake_minimum_required(VERSION 3.18)
+project(libmimalloc C)
+
+set(CMAKE_C_STANDARD 11)
+set(CMAKE_CXX_STANDARD 17)
+
+option(MI_SECURE            "Use full security mitigations (like guard pages, allocation randomization, double-free mitigation, and free-list corruption detection)" OFF)
+option(MI_DEBUG_FULL        "Use full internal heap invariant checking in DEBUG mode (expensive)" OFF)
+option(MI_PADDING           "Enable padding to detect heap block overflow (always on in DEBUG or SECURE mode, or with Valgrind/ASAN)" OFF)
+option(MI_OVERRIDE          "Override the standard malloc interface (i.e. define entry points for 'malloc', 'free', etc)" ON)
+option(MI_XMALLOC           "Enable abort() call on memory allocation failure by default" OFF)
+option(MI_SHOW_ERRORS       "Show error and warning messages by default (only enabled by default in DEBUG mode)" OFF)
+option(MI_GUARDED           "Build with guard pages behind certain object allocations (implies MI_NO_PADDING=ON)" OFF)
+option(MI_USE_CXX           "Use the C++ compiler to compile the library (instead of the C compiler)" OFF)
+option(MI_OPT_ARCH          "Only for optimized builds: turn on architecture specific optimizations (for x64: '-march=haswell;-mavx2' (2013), for arm64: '-march=armv8.1-a' (2016))" OFF)
+option(MI_OPT_SIMD          "Use SIMD instructions (requires MI_OPT_ARCH to be enabled)" OFF)
+option(MI_SEE_ASM           "Generate assembly files" OFF)
+option(MI_OSX_INTERPOSE     "Use interpose to override standard malloc on macOS" ON)
+option(MI_OSX_ZONE          "Use malloc zone to override standard malloc on macOS" ON)
+option(MI_WIN_REDIRECT      "Use redirection module ('mimalloc-redirect') on Windows if compiling mimalloc as a DLL" ON)
+option(MI_WIN_USE_FIXED_TLS "Use a fixed TLS slot on Windows to avoid extra tests in the malloc fast path" OFF)
+option(MI_LOCAL_DYNAMIC_TLS "Use local-dynamic-tls, a slightly slower but dlopen-compatible thread local storage mechanism (Unix)" OFF)
+option(MI_LIBC_MUSL         "Enable this when linking with musl libc" OFF)
+
+option(MI_DEBUG_TSAN        "Build with thread sanitizer (needs clang)" OFF)
+option(MI_DEBUG_UBSAN       "Build with undefined-behavior sanitizer (needs clang++)" OFF)
+option(MI_TRACK_VALGRIND    "Compile with Valgrind support (adds a small overhead)" OFF)
+option(MI_TRACK_ASAN        "Compile with address sanitizer support (adds a small overhead)" OFF)
+option(MI_TRACK_ETW         "Compile with Windows event tracing (ETW) support (adds a small overhead)" OFF)
+
+option(MI_BUILD_SHARED      "Build shared library" ON)
+option(MI_BUILD_STATIC      "Build static library" ON)
+option(MI_BUILD_OBJECT      "Build object library" ON)
+option(MI_BUILD_TESTS       "Build test executables" ON)
+
+option(MI_SKIP_COLLECT_ON_EXIT "Skip collecting memory on program exit" OFF)
+option(MI_NO_PADDING        "Force no use of padding even in DEBUG mode etc." OFF)
+option(MI_INSTALL_TOPLEVEL  "Install directly into $CMAKE_INSTALL_PREFIX instead of PREFIX/lib/mimalloc-version" OFF)
+option(MI_NO_THP            "Disable transparent huge pages support on Linux/Android for the mimalloc process only" OFF)
+option(MI_EXTRA_CPPDEFS     "Extra pre-processor definitions (use as `-DMI_EXTRA_CPPDEFS=\"opt1=val1;opt2=val2\"`)" "")
+
+# negated options for vcpkg features
+option(MI_NO_USE_CXX        "Use plain C compilation (has priority over MI_USE_CXX)" OFF)
+option(MI_NO_OPT_ARCH       "Do not use architecture specific optimizations (like '-march=armv8.1-a' for example) (has priority over MI_OPT_ARCH)" OFF)
+
+# deprecated options
+option(MI_WIN_USE_FLS       "Use Fiber local storage on Windows to detect thread termination (deprecated)" OFF)
+option(MI_CHECK_FULL        "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF)
+option(MI_USE_LIBATOMIC     "Explicitly link with -latomic (on older systems) (deprecated and detected automatically)" OFF)
+
+include(CheckLinkerFlag)    # requires cmake 3.18
+include(CheckIncludeFiles)
+include(GNUInstallDirs)
+include("cmake/mimalloc-config-version.cmake")
+
+set(mi_sources
+    src/alloc.c
+    src/alloc-aligned.c
+    src/alloc-posix.c
+    src/arena.c
+    src/arena-meta.c
+    src/bitmap.c
+    src/heap.c
+    src/init.c
+    src/libc.c
+    src/options.c
+    src/os.c
+    src/page.c
+    src/page-map.c
+    src/random.c
+    src/stats.c
+    src/theap.c
+    src/threadlocal.c
+    src/prim/prim.c)
+
+set(mi_cflags "")
+set(mi_cflags_static "")            # extra flags for a static library build
+set(mi_cflags_dynamic "")           # extra flags for a shared-object library build
+set(mi_libraries "")
+
+if(MI_EXTRA_CPPDEFS)
+ set(mi_defines ${MI_EXTRA_CPPDEFS})
+else()
+ set(mi_defines "")
+endif()
+
+# pass git revision as a define
+if(EXISTS "${CMAKE_SOURCE_DIR}/.git/index")
+  find_package(Git)
+  if(GIT_FOUND)
+    execute_process(COMMAND ${GIT_EXECUTABLE} "describe" OUTPUT_VARIABLE mi_git_describe RESULT_VARIABLE mi_git_res ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+    if(mi_git_res EQUAL "0")
+      list(APPEND mi_defines "MI_GIT_DESCRIBE=${mi_git_describe}")
+      # add to dependencies so we rebuild if the git head commit changes
+      set_property(GLOBAL APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS "${CMAKE_SOURCE_DIR}/.git/index")
+    endif()
+  endif()
+endif()
+
+# -----------------------------------------------------------------------------
+# Convenience: set default build type and compiler depending on the build directory
+# -----------------------------------------------------------------------------
+
+message(STATUS "")
+if (NOT CMAKE_BUILD_TYPE)
+  if ("${CMAKE_BINARY_DIR}" MATCHES ".*((D|d)ebug|asan|tsan|ubsan|valgrind)$" OR MI_DEBUG_FULL)
+    message(STATUS "No build type selected, default to 'Debug'")
+    set(CMAKE_BUILD_TYPE "Debug")
+  else()
+    message(STATUS "No build type selected, default to 'Release'")
+    set(CMAKE_BUILD_TYPE "Release")
+  endif()
+endif()
+
+if (CMAKE_GENERATOR MATCHES "^Visual Studio.*$")
+  message(STATUS "Note: when building with Visual Studio the build type is specified when building.")
+  message(STATUS "For example: 'cmake --build . --config=Release")
+endif()
+
+if("${CMAKE_BINARY_DIR}" MATCHES ".*(S|s)ecure$")
+  message(STATUS "Default to secure build")
+  set(MI_SECURE "ON")
+endif()
+
+
+# Determine architecture
+set(MI_OPT_ARCH_FLAGS "")
+set(MI_ARCH "unknown")
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86|i[3456]86)$" OR CMAKE_GENERATOR_PLATFORM MATCHES "^(x86|Win32)$")
+  set(MI_ARCH "x86")
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|x64|amd64|AMD64)$" OR CMAKE_GENERATOR_PLATFORM STREQUAL "x64" OR "x86_64" IN_LIST CMAKE_OSX_ARCHITECTURES) # must be before arm64
+  set(MI_ARCH "x64")
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|armv[89].?|ARM64)$" OR CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64" OR "arm64" IN_LIST CMAKE_OSX_ARCHITECTURES)
+  set(MI_ARCH "arm64")
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|armv[34567].?|ARM)$")
+  set(MI_ARCH "arm32")
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv|riscv32|riscv64)$")
+  if(CMAKE_SIZEOF_VOID_P==4)
+    set(MI_ARCH "riscv32")
+  else()
+    set(MI_ARCH "riscv64")
+  endif()
+else()
+  set(MI_ARCH ${CMAKE_SYSTEM_PROCESSOR})
+endif()
+message(STATUS "Architecture: ${MI_ARCH}") # (${CMAKE_SYSTEM_PROCESSOR}, ${CMAKE_GENERATOR_PLATFORM}, ${CMAKE_GENERATOR})")
+
+# negative overrides (mainly to support vcpkg features)
+if(MI_NO_USE_CXX)
+  set(MI_USE_CXX "OFF")
+endif()
+
+if(MI_NO_OPT_ARCH)
+  set(MI_OPT_ARCH "OFF")
+elseif(MI_ARCH STREQUAL "arm64")
+  set(MI_OPT_ARCH "ON")  # enable armv8.1-a by default on arm64 unless MI_NO_OPT_ARCH is set
+endif()
+
+# -----------------------------------------------------------------------------
+# Enable the C++ compiler early on if needed
+# -----------------------------------------------------------------------------
+
+# clang-cl detection on windows
+if(CMAKE_C_COMPILER_ID STREQUAL "Clang" AND CMAKE_C_COMPILER_FRONTEND_VARIANT STREQUAL "MSVC")
+  set(MI_CLANG_CL "ON")
+endif()
+
+# force C++ compilation with msvc or clang-cl to use modern C++ atomics
+if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel" OR MI_CLANG_CL)
+  set(MI_USE_CXX "ON")
+elseif(MI_DEBUG_UBSAN AND CMAKE_BUILD_TYPE MATCHES "Debug")  # ubsan needs C++
+  set(MI_USE_CXX "ON")
+endif()
+
+# enable C++ ?
+if(MI_USE_CXX)
+  enable_language(CXX)
+  message(STATUS "Use the C++ compiler to compile (MI_USE_CXX=ON)")
+  if(CMAKE_CXX_COMPILER_ID MATCHES "Intel" AND NOT CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM")
+    list(APPEND mi_cflags -Kc++)
+  endif()
+endif()
+
+
+# -----------------------------------------------------------------------------
+# Process options
+# -----------------------------------------------------------------------------
+
+# put -Wall early so other warnings can be disabled selectively
+if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang")
+  if (MI_CLANG_CL)
+    list(APPEND mi_cflags -W)
+  else()
+    list(APPEND mi_cflags -Wall -Wextra -Wpedantic)
+  endif()
+endif()
+if(CMAKE_C_COMPILER_ID MATCHES "GNU")
+    list(APPEND mi_cflags -Wall -Wextra)
+endif()
+if(CMAKE_C_COMPILER_ID MATCHES "Intel")
+    list(APPEND mi_cflags -Wall)
+endif()
+
+if(CMAKE_CXX_COMPILER_ID MATCHES "AppleClang|Clang")
+  list(APPEND mi_cflags -Wno-deprecated)
+endif()
+
+if(MI_OVERRIDE)
+  message(STATUS "Override standard malloc (MI_OVERRIDE=ON)")
+  if(APPLE)
+    if(MI_OSX_ZONE)
+      # use zone's on macOS
+      message(STATUS "  Use malloc zone to override malloc (MI_OSX_ZONE=ON)")
+      list(APPEND mi_sources src/prim/osx/alloc-override-zone.c)
+      list(APPEND mi_defines MI_OSX_ZONE=1)
+      if (NOT MI_OSX_INTERPOSE)
+        message(STATUS "  WARNING: zone overriding usually also needs interpose (use -DMI_OSX_INTERPOSE=ON)")
+      endif()
+    endif()
+    if(MI_OSX_INTERPOSE)
+      # use interpose on macOS
+      message(STATUS "  Use interpose to override malloc (MI_OSX_INTERPOSE=ON)")
+      list(APPEND mi_defines MI_OSX_INTERPOSE=1)
+      if (NOT MI_OSX_ZONE)
+        message(STATUS "  WARNING: interpose usually also needs zone overriding (use -DMI_OSX_ZONE=ON)")
+      endif()
+    endif()
+    if(MI_USE_CXX AND MI_OSX_INTERPOSE)
+      message(STATUS "  WARNING: if dynamically overriding malloc/free, it is more reliable to build mimalloc as C code (use -DMI_USE_CXX=OFF)")
+    endif()
+  endif()
+endif()
+
+if(WIN32)
+  if (NOT MI_WIN_REDIRECT)
+    # use a negative define for backward compatibility
+    list(APPEND mi_defines MI_WIN_NOREDIRECT=1)
+  endif()
+endif()
+
+if(MI_SECURE)
+  message(STATUS "Set full secure build (MI_SECURE=ON)")
+  list(APPEND mi_defines MI_SECURE=4)
+endif()
+
+if(MI_TRACK_VALGRIND)
+  CHECK_INCLUDE_FILES("valgrind/valgrind.h;valgrind/memcheck.h" MI_HAS_VALGRINDH)
+  if (NOT MI_HAS_VALGRINDH)
+    set(MI_TRACK_VALGRIND OFF)
+    message(WARNING "Cannot find the 'valgrind/valgrind.h' and 'valgrind/memcheck.h' -- install valgrind first?")
+    message(STATUS  "Disabling Valgrind support (MI_TRACK_VALGRIND=OFF)")
+  else()
+    message(STATUS "Compile with Valgrind support (MI_TRACK_VALGRIND=ON)")
+    list(APPEND mi_defines MI_TRACK_VALGRIND=1)
+  endif()
+endif()
+
+if(MI_TRACK_ASAN)
+  if (APPLE AND MI_OVERRIDE)
+    set(MI_TRACK_ASAN OFF)
+    message(WARNING "Cannot enable address sanitizer support on macOS if MI_OVERRIDE is ON (MI_TRACK_ASAN=OFF)")
+  endif()
+  if (MI_TRACK_VALGRIND)
+    set(MI_TRACK_ASAN OFF)
+    message(WARNING "Cannot enable address sanitizer support with also Valgrind support enabled (MI_TRACK_ASAN=OFF)")
+  endif()
+  if(MI_TRACK_ASAN)
+    CHECK_INCLUDE_FILES("sanitizer/asan_interface.h" MI_HAS_ASANH)
+    if (NOT MI_HAS_ASANH)
+      set(MI_TRACK_ASAN OFF)
+      message(WARNING "Cannot find the 'sanitizer/asan_interface.h' -- install address sanitizer support first")
+      message(STATUS  "Compile **without** address sanitizer support (MI_TRACK_ASAN=OFF)")
+    else()
+      message(STATUS "Compile with address sanitizer support (MI_TRACK_ASAN=ON)")
+      list(APPEND mi_defines MI_TRACK_ASAN=1)
+      list(APPEND mi_cflags -fsanitize=address)
+      list(APPEND mi_libraries -fsanitize=address)
+    endif()
+  endif()
+endif()
+
+if(MI_TRACK_ETW)
+  if(NOT WIN32)
+    set(MI_TRACK_ETW OFF)
+    message(WARNING "Can only enable ETW support on Windows (MI_TRACK_ETW=OFF)")
+  endif()
+  if (MI_TRACK_VALGRIND OR MI_TRACK_ASAN)
+    set(MI_TRACK_ETW OFF)
+    message(WARNING "Cannot enable ETW support with also Valgrind or ASAN support enabled (MI_TRACK_ETW=OFF)")
+  endif()
+  if(MI_TRACK_ETW)
+    message(STATUS "Compile with Windows event tracing support (MI_TRACK_ETW=ON)")
+    list(APPEND mi_defines MI_TRACK_ETW=1)
+  endif()
+endif()
+
+if(MI_GUARDED)
+  message(STATUS "Compile guard pages behind certain object allocations (MI_GUARDED=ON)")
+  list(APPEND mi_defines MI_GUARDED=1)
+  if(NOT MI_NO_PADDING)
+    message(STATUS "  Disabling padding due to guard pages (MI_NO_PADDING=ON)")
+    set(MI_NO_PADDING ON)
+  endif()
+endif()
+
+if(MI_SEE_ASM)
+  message(STATUS "Generate assembly listings (MI_SEE_ASM=ON)")
+  list(APPEND mi_cflags -save-temps)
+  if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER 14)
+    message(STATUS "No GNU Line marker")
+    list(APPEND mi_cflags -Wno-gnu-line-marker)
+  endif()
+endif()
+
+if(MI_CHECK_FULL)
+  message(STATUS "The MI_CHECK_FULL option is deprecated, use MI_DEBUG_FULL instead")
+  set(MI_DEBUG_FULL "ON")
+endif()
+
+if (MI_SKIP_COLLECT_ON_EXIT)
+  message(STATUS "Skip collecting memory on program exit (MI_SKIP_COLLECT_ON_EXIT=ON)")
+  list(APPEND mi_defines MI_SKIP_COLLECT_ON_EXIT=1)
+endif()
+
+if(MI_DEBUG_FULL)
+  message(STATUS "Set debug level to full internal invariant checking (MI_DEBUG_FULL=ON)")
+  list(APPEND mi_defines MI_DEBUG=3)   # full invariant checking
+endif()
+
+if(MI_NO_PADDING)
+  message(STATUS "Suppress any padding of heap blocks (MI_NO_PADDING=ON)")
+  list(APPEND mi_defines MI_PADDING=0)
+else()
+  if(MI_PADDING)
+    message(STATUS "Enable explicit padding of heap blocks (MI_PADDING=ON)")
+    list(APPEND mi_defines MI_PADDING=1)
+  endif()
+endif()
+
+if(MI_XMALLOC)
+  message(STATUS "Enable abort() calls on memory allocation failure (MI_XMALLOC=ON)")
+  list(APPEND mi_defines MI_XMALLOC=1)
+endif()
+
+if(MI_SHOW_ERRORS)
+  message(STATUS "Enable printing of error and warning messages by default (MI_SHOW_ERRORS=ON)")
+  list(APPEND mi_defines MI_SHOW_ERRORS=1)
+endif()
+
+if(MI_DEBUG_TSAN)
+  if(CMAKE_C_COMPILER_ID MATCHES "Clang")
+    message(STATUS "Build with thread sanitizer (MI_DEBUG_TSAN=ON)")
+    list(APPEND mi_defines MI_TSAN=1)
+    list(APPEND mi_cflags -fsanitize=thread -g -O1)
+    list(APPEND mi_libraries -fsanitize=thread)
+  else()
+    message(WARNING "Can only use thread sanitizer with clang (MI_DEBUG_TSAN=ON but ignored)")
+  endif()
+endif()
+
+if(MI_DEBUG_UBSAN)
+  if(CMAKE_BUILD_TYPE MATCHES "Debug")
+    if(MI_USE_CXX)
+      if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+        message(STATUS "Build with undefined-behavior sanitizer (MI_DEBUG_UBSAN=ON)")
+        list(APPEND mi_defines MI_UBSAN=1)
+        list(APPEND mi_cflags -fsanitize=undefined -g -fno-sanitize-recover=undefined)
+        list(APPEND mi_libraries -fsanitize=undefined)
+      else()
+        message(WARNING "Can only use undefined-behavior sanitizer with clang++ (MI_DEBUG_UBSAN=ON but ignored)")
+      endif()
+    else()
+      message(WARNING "Can only use undefined-behavior sanitizer with a C++ build (MI_USE_CXX=ON)")
+    endif()
+  else()
+    message(WARNING "Can only use undefined-behavior sanitizer with a debug build (CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE})")
+  endif()
+endif()
+
+if(CMAKE_SYSTEM_NAME MATCHES "Linux|Android")
+  if(MI_NO_THP)
+    message(STATUS "Disable transparent huge pages support (MI_NO_THP=ON)")
+    list(APPEND mi_defines MI_NO_THP=1)
+  endif()
+endif()
+
+if(MI_LIBC_MUSL)
+  message(STATUS "Assume using musl libc (MI_LIBC_MUSL=ON)")
+  list(APPEND mi_defines MI_LIBC_MUSL=1)
+endif()
+
+if(MI_WIN_USE_FLS)
+  message(STATUS "Use the Fiber API to detect thread termination (deprecated) (MI_WIN_USE_FLS=ON)")
+  list(APPEND mi_defines MI_WIN_USE_FLS=1)
+endif()
+
+if(MI_WIN_USE_FIXED_TLS)
+  message(STATUS "Use fixed TLS slot on Windows to avoid extra tests in the malloc fast path (MI_WIN_USE_FIXED_TLS=ON)")
+  list(APPEND mi_defines MI_WIN_USE_FIXED_TLS=1)
+endif()
+
+# Check /proc/cpuinfo for an SV39 MMU and limit the virtual address bits.
+# (this will skip the aligned hinting in that case. Issue #939, #949)
+if (EXISTS /proc/cpuinfo)
+  file(STRINGS /proc/cpuinfo mi_sv39_mmu REGEX "^mmu[ \t]+:[ \t]+sv39$")
+  if (mi_sv39_mmu)
+    MESSAGE( STATUS "Set virtual address bits to 39 (SV39 MMU detected)" )
+    list(APPEND mi_defines MI_DEFAULT_VIRTUAL_ADDRESS_BITS=39)
+  endif()
+endif()
+
+# On Haiku use `-DCMAKE_INSTALL_PREFIX` instead, issue #788
+# if(CMAKE_SYSTEM_NAME MATCHES "Haiku")
+#   SET(CMAKE_INSTALL_LIBDIR ~/config/non-packaged/lib)
+#   SET(CMAKE_INSTALL_INCLUDEDIR ~/config/non-packaged/headers)
+# endif()
+
+# Compiler flags
+if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU" AND NOT MI_CLANG_CL)
+  list(APPEND mi_cflags -Wno-unknown-pragmas -fvisibility=hidden)
+  if(NOT MI_USE_CXX)
+    list(APPEND mi_cflags -Wstrict-prototypes)
+  endif()
+  if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang")
+    list(APPEND mi_cflags -Wno-static-in-inline)
+  endif()
+endif()
+
+if(CMAKE_C_COMPILER_ID MATCHES "Intel")
+  list(APPEND mi_cflags -fvisibility=hidden)
+endif()
+
+if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM_NAME MATCHES "Haiku" AND NOT MI_CLANG_CL)
+  if(MI_LOCAL_DYNAMIC_TLS)
+    list(APPEND mi_cflags -ftls-model=local-dynamic)
+  else()
+    if(MI_LIBC_MUSL)
+      # with musl we use local-dynamic for the static build, see issue #644
+      list(APPEND mi_cflags_static  -ftls-model=local-dynamic)
+      list(APPEND mi_cflags_dynamic -ftls-model=initial-exec)
+      message(STATUS "Use local dynamic TLS for the static build (since MI_LIBC_MUSL=ON)")
+    else()
+      list(APPEND mi_cflags -ftls-model=initial-exec)
+    endif()
+  endif()
+endif()
+
+if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel")
+  if(MI_OVERRIDE)
+    list(APPEND mi_cflags -fno-builtin-malloc)
+  endif()
+endif()
+
+# Compiler and architecture specific flags
+if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM_NAME MATCHES "Haiku")
+  if(MI_OPT_ARCH)
+    if(APPLE AND CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang" AND CMAKE_OSX_ARCHITECTURES)   # to support multi-arch binaries (#999)
+      if("arm64" IN_LIST CMAKE_OSX_ARCHITECTURES)
+        list(APPEND MI_OPT_ARCH_FLAGS "-Xarch_arm64;-march=armv8.1-a")
+      endif()
+      if("x86_64" IN_LIST CMAKE_OSX_ARCHITECTURES)
+        list(APPEND MI_OPT_ARCH_FLAGS "-Xarch_x86_64;-march=haswell;-Xarch_x86_64;-mavx2")
+      endif()
+    elseif(MI_ARCH STREQUAL "x64")
+      set(MI_OPT_ARCH_FLAGS "-march=haswell;-mavx2")    # fast bit scan (since 2013)
+    elseif(MI_ARCH STREQUAL "arm64")
+      set(MI_OPT_ARCH_FLAGS "-march=armv8.1-a")         # fast atomics (since 2016)
+    endif()
+  endif()
+endif()
+
+if (MSVC AND MSVC_VERSION GREATER_EQUAL 1914) # vs2017+
+  list(APPEND mi_cflags /Zc:__cplusplus)
+  if(MI_OPT_ARCH AND NOT MI_CLANG_CL)
+    if(MI_ARCH STREQUAL "x64")
+      set(MI_OPT_ARCH_FLAGS "/arch:AVX2")
+    elseif(MI_ARCH STREQUAL "arm64")
+      set(MI_OPT_ARCH_FLAGS "/arch:armv8.1")
+    endif()
+  endif()
+endif()
+
+if(MINGW)
+  add_definitions(-D_WIN32_WINNT=0x600)                # issue #976
+endif()
+
+if(MI_OPT_ARCH_FLAGS)
+  list(APPEND mi_cflags ${MI_OPT_ARCH_FLAGS})
+  message(STATUS "Architecture specific optimization is enabled (with ${MI_OPT_ARCH_FLAGS}) (MI_OPT_ARCH=ON)")
+  if (MI_OPT_SIMD)
+    list(APPEND mi_defines "MI_OPT_SIMD=1")
+    message(STATUS "SIMD instructions are enabled (MI_OPT_SIMD=ON)")
+  endif()
+elseif(MI_OPT_SIMD)
+  message(STATUS "SIMD instructions are not enabled (either MI_OPT_ARCH=OFF or this architecture has no SIMD support)")
+endif()
+
+# extra needed libraries
+
+# we prefer -l<lib> test over `find_library` as sometimes core libraries
+# like `libatomic` are not on the system path (see issue #898)
+function(find_link_library libname outlibname)
+  check_linker_flag(C "-l${libname}" mi_has_lib${libname})
+  if (mi_has_lib${libname})
+    message(VERBOSE "link library: -l${libname}")
+    set(${outlibname} ${libname} PARENT_SCOPE)
+  else()
+    find_library(MI_LIBPATH_${libname} ${libname})
+    if (MI_LIBPATH_${libname})
+      message(VERBOSE "link library ${libname} at ${MI_LIBPATH_${libname}}")
+      set(${outlibname} ${MI_LIBPATH_${libname}} PARENT_SCOPE)
+    else()
+      message(VERBOSE "link library not found: ${libname}")
+      set(${outlibname} "" PARENT_SCOPE)
+    endif()
+  endif()
+endfunction()
+
+if(WIN32)
+  list(APPEND mi_libraries psapi shell32 user32 advapi32 bcrypt)
+else()
+  find_link_library("pthread" MI_LIB_PTHREAD)
+  if(MI_LIB_PTHREAD)
+    list(APPEND mi_libraries "${MI_LIB_PTHREAD}")
+  endif()
+  find_link_library("rt" MI_LIB_RT)
+  if(MI_LIB_RT)
+    list(APPEND mi_libraries "${MI_LIB_RT}")
+  endif()
+  find_link_library("atomic" MI_LIB_ATOMIC)
+  if(MI_LIB_ATOMIC)
+    list(APPEND mi_libraries "${MI_LIB_ATOMIC}")
+  endif()
+endif()
+
+# set language for source files now
+if(MI_USE_CXX)
+  set_source_files_properties(${mi_sources} PROPERTIES LANGUAGE CXX )
+  set_source_files_properties(src/static.c test/test-api.c test/test-api-fill test/test-stress PROPERTIES LANGUAGE CXX )
+endif()
+
+
+# -----------------------------------------------------------------------------
+# Install and output names
+# -----------------------------------------------------------------------------
+
+# dynamic/shared library and symlinks always go to /usr/local/lib equivalent
+# we use ${CMAKE_INSTALL_BINDIR} and ${CMAKE_INSTALL_LIBDIR}.
+
+# static libraries and object files, includes, and cmake config files
+# are either installed at top level, or use versioned directories for side-by-side installation (default)
+if (MI_INSTALL_TOPLEVEL)
+  set(mi_install_objdir     "${CMAKE_INSTALL_LIBDIR}")
+  set(mi_install_incdir     "${CMAKE_INSTALL_INCLUDEDIR}")
+  set(mi_install_cmakedir   "${CMAKE_INSTALL_LIBDIR}/cmake/mimalloc")
+else()
+  set(mi_install_objdir     "${CMAKE_INSTALL_LIBDIR}/mimalloc-${mi_version}")       # for static library and object files
+  set(mi_install_incdir     "${CMAKE_INSTALL_INCLUDEDIR}/mimalloc-${mi_version}")   # for includes
+  set(mi_install_cmakedir   "${CMAKE_INSTALL_LIBDIR}/cmake/mimalloc-${mi_version}") # for cmake package info
+endif()
+
+set(mi_libname "mimalloc")
+if(MI_SECURE)
+  set(mi_libname "${mi_libname}-secure")
+endif()
+if(MI_TRACK_VALGRIND)
+  set(mi_libname "${mi_libname}-valgrind")
+endif()
+if(MI_TRACK_ASAN)
+  set(mi_libname "${mi_libname}-asan")
+endif()
+string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LC)
+list(APPEND mi_defines "MI_CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE_LC}")  #todo: multi-config project needs $<CONFIG> ?
+if(CMAKE_BUILD_TYPE_LC MATCHES "^(release|relwithdebinfo|minsizerel|none)$")
+  list(APPEND mi_defines MI_BUILD_RELEASE)
+else()
+  set(mi_libname "${mi_libname}-${CMAKE_BUILD_TYPE_LC}") #append build type (e.g. -debug) if not a release version
+endif()
+
+if(MI_BUILD_SHARED)
+  list(APPEND mi_build_targets "shared")
+endif()
+if(MI_BUILD_STATIC)
+  list(APPEND mi_build_targets "static")
+endif()
+if(MI_BUILD_OBJECT)
+  list(APPEND mi_build_targets "object")
+endif()
+if(MI_BUILD_TESTS)
+  list(APPEND mi_build_targets "tests")
+endif()
+
+message(STATUS "")
+message(STATUS "Library name     : ${mi_libname}")
+message(STATUS "Version          : ${mi_version}.${mi_version_patch}")
+message(STATUS "Build type       : ${CMAKE_BUILD_TYPE_LC}")
+if(MI_USE_CXX)
+  message(STATUS "C++ Compiler     : ${CMAKE_CXX_COMPILER}")
+else()
+  message(STATUS "C Compiler       : ${CMAKE_C_COMPILER}")
+endif()
+message(STATUS "Compiler flags   : ${mi_cflags}")
+message(STATUS "Compiler defines : ${mi_defines}")
+message(STATUS "Link libraries   : ${mi_libraries}")
+message(STATUS "Build targets    : ${mi_build_targets}")
+message(STATUS "")
+
+# -----------------------------------------------------------------------------
+# Main targets
+# -----------------------------------------------------------------------------
+
+# shared library
+if(MI_BUILD_SHARED)
+  add_library(mimalloc SHARED ${mi_sources})
+  set_target_properties(mimalloc PROPERTIES VERSION ${mi_version} SOVERSION ${mi_version_major} OUTPUT_NAME ${mi_libname} )
+  target_compile_definitions(mimalloc PRIVATE ${mi_defines} MI_SHARED_LIB MI_SHARED_LIB_EXPORT)
+  target_compile_options(mimalloc PRIVATE ${mi_cflags} ${mi_cflags_dynamic})
+  target_link_libraries(mimalloc PRIVATE ${mi_libraries})
+  target_include_directories(mimalloc PUBLIC
+      $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+      $<INSTALL_INTERFACE:${mi_install_incdir}>
+  )
+  install(TARGETS mimalloc EXPORT mimalloc ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
+  install(EXPORT mimalloc DESTINATION ${mi_install_cmakedir})
+
+  if(WIN32 AND NOT MINGW)
+    # On windows, the import library name for the dll would clash with the static mimalloc.lib library
+    # so we postfix the dll import library with `.dll.lib` (and also the .pdb debug file)
+    set_property(TARGET mimalloc PROPERTY ARCHIVE_OUTPUT_NAME "${mi_libname}.dll" )
+    install(FILES "$<TARGET_FILE_DIR:mimalloc>/${mi_libname}.dll.lib" DESTINATION ${CMAKE_INSTALL_LIBDIR})
+    set_property(TARGET mimalloc PROPERTY PDB_NAME "${mi_libname}.dll")
+    # don't try to install the pdb since it may not be generated depending on the configuration
+    # install(FILES "$<TARGET_FILE_DIR:mimalloc>/${mi_libname}.dll.pdb" DESTINATION ${CMAKE_INSTALL_LIBDIR})
+  endif()
+  if(WIN32 AND MI_WIN_REDIRECT)
+    if(MINGW)
+      set_property(TARGET mimalloc PROPERTY PREFIX "")
+    endif()
+    # On windows, link and copy the mimalloc redirection dll too.
+    if(CMAKE_GENERATOR_PLATFORM STREQUAL "arm64ec")
+      set(MIMALLOC_REDIRECT_SUFFIX "-arm64ec")
+    elseif(MI_ARCH STREQUAL "x64")
+      set(MIMALLOC_REDIRECT_SUFFIX "")
+      if(CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64")
+        message(STATUS "Note: x64 code emulated on Windows for arm64 should use an arm64ec build of 'mimalloc.dll'")
+        message(STATUS "      together with 'mimalloc-redirect-arm64ec.dll'. See the 'bin\\readme.md' for more information.")
+      endif()
+    elseif(MI_ARCH STREQUAL "x86")
+      set(MIMALLOC_REDIRECT_SUFFIX "32")
+    else()
+      set(MIMALLOC_REDIRECT_SUFFIX "-${MI_ARCH}")  # -arm64 etc.
+    endif()
+
+    target_link_libraries(mimalloc PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/bin/mimalloc-redirect${MIMALLOC_REDIRECT_SUFFIX}.lib)  # the DLL import library
+    add_custom_command(TARGET mimalloc POST_BUILD
+      COMMAND "${CMAKE_COMMAND}" -E copy "${CMAKE_CURRENT_SOURCE_DIR}/bin/mimalloc-redirect${MIMALLOC_REDIRECT_SUFFIX}.dll" $<TARGET_FILE_DIR:mimalloc>
+      COMMENT "Copy mimalloc-redirect${MIMALLOC_REDIRECT_SUFFIX}.dll to output directory")
+    install(FILES "$<TARGET_FILE_DIR:mimalloc>/mimalloc-redirect${MIMALLOC_REDIRECT_SUFFIX}.dll" DESTINATION ${CMAKE_INSTALL_BINDIR})
+  endif()
+endif()
+
+
+# static library
+if (MI_BUILD_STATIC)
+  add_library(mimalloc-static STATIC ${mi_sources})
+  set_property(TARGET mimalloc-static PROPERTY OUTPUT_NAME ${mi_libname})
+  set_property(TARGET mimalloc-static PROPERTY POSITION_INDEPENDENT_CODE ON)
+  target_compile_definitions(mimalloc-static PRIVATE ${mi_defines} MI_STATIC_LIB)
+  target_compile_options(mimalloc-static PRIVATE ${mi_cflags} ${mi_cflags_static})
+  target_link_libraries(mimalloc-static PRIVATE ${mi_libraries})
+  target_include_directories(mimalloc-static PUBLIC
+      $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+      $<INSTALL_INTERFACE:${mi_install_incdir}>
+  )
+  install(TARGETS mimalloc-static EXPORT mimalloc DESTINATION ${mi_install_objdir} LIBRARY)
+  install(EXPORT mimalloc DESTINATION ${mi_install_cmakedir})
+endif()
+
+# install include files
+install(FILES include/mimalloc.h DESTINATION ${mi_install_incdir})
+install(FILES include/mimalloc-override.h DESTINATION ${mi_install_incdir})
+install(FILES include/mimalloc-new-delete.h DESTINATION ${mi_install_incdir})
+install(FILES include/mimalloc-stats.h DESTINATION ${mi_install_incdir})
+install(FILES cmake/mimalloc-config.cmake DESTINATION ${mi_install_cmakedir})
+install(FILES cmake/mimalloc-config-version.cmake DESTINATION ${mi_install_cmakedir})
+
+
+# single object file for more predictable static overriding
+if (MI_BUILD_OBJECT)
+  add_library(mimalloc-obj OBJECT src/static.c)
+  set_property(TARGET mimalloc-obj PROPERTY POSITION_INDEPENDENT_CODE ON)
+  target_compile_definitions(mimalloc-obj PRIVATE ${mi_defines})
+  target_compile_options(mimalloc-obj PRIVATE ${mi_cflags} ${mi_cflags_static})
+  target_include_directories(mimalloc-obj PUBLIC
+      $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+      $<INSTALL_INTERFACE:${mi_install_incdir}>
+  )
+
+  # Copy the generated object file (`static.o`) to the output directory (as `mimalloc.o`)
+  if(CMAKE_GENERATOR MATCHES "^Visual Studio.*$")
+    set(mimalloc-obj-static "${CMAKE_CURRENT_BINARY_DIR}/mimalloc-obj.dir/$<CONFIG>/static${CMAKE_C_OUTPUT_EXTENSION}")
+  else()
+    set(mimalloc-obj-static "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/mimalloc-obj.dir/src/static.c${CMAKE_C_OUTPUT_EXTENSION}")
+  endif()
+  set(mimalloc-obj-out "${CMAKE_CURRENT_BINARY_DIR}/${mi_libname}${CMAKE_C_OUTPUT_EXTENSION}")
+  add_custom_command(OUTPUT ${mimalloc-obj-out} DEPENDS mimalloc-obj COMMAND "${CMAKE_COMMAND}" -E copy "${mimalloc-obj-static}" "${mimalloc-obj-out}")
+  add_custom_target(mimalloc-obj-target ALL DEPENDS ${mimalloc-obj-out})
+
+
+  # the following seems to lead to cmake warnings/errors on some systems, disable for now :-(
+  # install(TARGETS mimalloc-obj EXPORT mimalloc DESTINATION ${mi_install_objdir})
+
+  # the FILES expression can also be: $<TARGET_OBJECTS:mimalloc-obj>
+  # but that fails cmake versions less than 3.10 so we leave it as is for now
+  install(FILES ${mimalloc-obj-static}
+          DESTINATION ${mi_install_objdir}
+          RENAME ${mi_libname}${CMAKE_C_OUTPUT_EXTENSION} )
+endif()
+
+
+# pkg-config file support
+set(mi_pc_libraries "")
+foreach(item IN LISTS mi_libraries)
+  if(item MATCHES " *[-].*")
+    set(mi_pc_libraries "${mi_pc_libraries} ${item}")
+  else()
+    set(mi_pc_libraries "${mi_pc_libraries} -l${item}")
+  endif()
+endforeach()
+
+include("cmake/JoinPaths.cmake")
+join_paths(mi_pc_includedir "\${prefix}" "${CMAKE_INSTALL_INCLUDEDIR}")
+join_paths(mi_pc_libdir "\${prefix}" "${CMAKE_INSTALL_LIBDIR}")
+
+configure_file(mimalloc.pc.in mimalloc.pc @ONLY)
+install(FILES "${CMAKE_CURRENT_BINARY_DIR}/mimalloc.pc"
+        DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig/")
+
+
+
+# -----------------------------------------------------------------------------
+# API surface testing
+# -----------------------------------------------------------------------------
+
+if (MI_BUILD_TESTS)
+  enable_testing()
+
+  # static link tests
+  foreach(TEST_NAME api api-fill stress)
+    add_executable(mimalloc-test-${TEST_NAME} test/test-${TEST_NAME}.c)
+    target_compile_definitions(mimalloc-test-${TEST_NAME} PRIVATE ${mi_defines})
+    target_compile_options(mimalloc-test-${TEST_NAME} PRIVATE ${mi_cflags})
+    target_include_directories(mimalloc-test-${TEST_NAME} PRIVATE include)
+    if(MI_BUILD_STATIC AND NOT MI_DEBUG_TSAN)
+      target_link_libraries(mimalloc-test-${TEST_NAME} PRIVATE mimalloc-static ${mi_libraries})
+    elseif(MI_BUILD_SHARED)
+      target_link_libraries(mimalloc-test-${TEST_NAME} PRIVATE mimalloc ${mi_libraries})
+    else()
+      message(STATUS "cannot build TSAN tests without MI_BUILD_SHARED being enabled")
+    endif()
+    add_test(NAME test-${TEST_NAME} COMMAND mimalloc-test-${TEST_NAME})
+  endforeach()
+
+  # dynamic override test
+  if(MI_BUILD_SHARED AND NOT (MI_TRACK_ASAN OR MI_DEBUG_TSAN OR MI_DEBUG_UBSAN))
+    add_executable(mimalloc-test-stress-dynamic test/test-stress.c)
+    target_compile_definitions(mimalloc-test-stress-dynamic PRIVATE ${mi_defines} "USE_STD_MALLOC=1")
+    target_compile_options(mimalloc-test-stress-dynamic PRIVATE ${mi_cflags})
+    target_include_directories(mimalloc-test-stress-dynamic PRIVATE include)
+    if(WIN32)
+      target_compile_definitions(mimalloc-test-stress-dynamic PRIVATE "MI_LINK_VERSION=1")  # link mi_version
+      target_link_libraries(mimalloc-test-stress-dynamic PRIVATE mimalloc ${mi_libraries})  # link mi_version
+      add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_VERBOSE=1 $<TARGET_FILE:mimalloc-test-stress-dynamic>)
+    else()
+      target_link_libraries(mimalloc-test-stress-dynamic PRIVATE ${mi_libraries}) # pthreads, issue 1158
+      if(APPLE)
+        set(LD_PRELOAD "DYLD_INSERT_LIBRARIES")
+      else()
+        set(LD_PRELOAD "LD_PRELOAD")
+      endif()
+      add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_VERBOSE=1 ${LD_PRELOAD}=$<TARGET_FILE:mimalloc> $<TARGET_FILE:mimalloc-test-stress-dynamic>)
+    endif()
+  endif()
+endif()
+
+# -----------------------------------------------------------------------------
+# Set override properties
+# -----------------------------------------------------------------------------
+if (MI_OVERRIDE)
+  if (MI_BUILD_SHARED)
+    target_compile_definitions(mimalloc PRIVATE MI_MALLOC_OVERRIDE)
+  endif()
+  if(NOT WIN32)
+    # It is only possible to override malloc on Windows when building as a DLL.
+    if (MI_BUILD_STATIC)
+      target_compile_definitions(mimalloc-static PRIVATE MI_MALLOC_OVERRIDE)
+    endif()
+    if (MI_BUILD_OBJECT)
+      target_compile_definitions(mimalloc-obj PRIVATE MI_MALLOC_OVERRIDE)
+    endif()
+  endif()
+endif()
diff --git a/3rd/mimalloc-2.0.9/LICENSE b/3rd/mimalloc-3.2.6/LICENSE
similarity index 94%
rename from 3rd/mimalloc-2.0.9/LICENSE
rename to 3rd/mimalloc-3.2.6/LICENSE
index 670b668a..53315ebe 100644
--- a/3rd/mimalloc-2.0.9/LICENSE
+++ b/3rd/mimalloc-3.2.6/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2018-2021 Microsoft Corporation, Daan Leijen
+Copyright (c) 2018-2025 Microsoft Corporation, Daan Leijen
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/3rd/mimalloc-3.2.6/SECURITY.md b/3rd/mimalloc-3.2.6/SECURITY.md
new file mode 100644
index 00000000..b3c89efc
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/SECURITY.md
@@ -0,0 +1,41 @@
+<!-- BEGIN MICROSOFT SECURITY.MD V0.0.9 BLOCK -->
+
+## Security
+
+Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin).
+
+If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below.
+
+## Reporting Security Issues
+
+**Please do not report security vulnerabilities through public GitHub issues.**
+
+Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report).
+
+If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp).
+
+You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 
+
+Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
+
+  * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
+  * Full paths of source file(s) related to the manifestation of the issue
+  * The location of the affected source code (tag/branch/commit or direct URL)
+  * Any special configuration required to reproduce the issue
+  * Step-by-step instructions to reproduce the issue
+  * Proof-of-concept or exploit code (if possible)
+  * Impact of the issue, including how an attacker might exploit the issue
+
+This information will help us triage your report more quickly.
+
+If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs.
+
+## Preferred Languages
+
+We prefer all communications to be in English.
+
+## Policy
+
+Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd).
+
+<!-- END MICROSOFT SECURITY.MD BLOCK -->
diff --git a/3rd/mimalloc-3.2.6/azure-pipelines.yml b/3rd/mimalloc-3.2.6/azure-pipelines.yml
new file mode 100644
index 00000000..3e3d700e
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/azure-pipelines.yml
@@ -0,0 +1,262 @@
+# Starter pipeline
+# Start with a minimal pipeline that you can customize to build and deploy your code.
+# Add steps that build, run tests, deploy, and more:
+# https://aka.ms/yaml
+
+trigger:
+  branches:
+    include:
+    - main
+    - dev*
+  tags:
+    include:
+    - v*
+
+jobs:
+- job:
+  displayName: Windows 2022
+  pool:
+    vmImage:
+      windows-2022
+  strategy:
+    matrix:
+      Debug:
+        BuildType: debug
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
+        MSBuildConfiguration: Debug
+      Release:
+        BuildType: release
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
+        MSBuildConfiguration: Release
+      Release SIMD:
+        BuildType: release-simd
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_OPT_ARCH=ON -DMI_OPT_SIMD=ON -DMI_WIN_USE_FIXED_TLS=ON
+        MSBuildConfiguration: Release
+      Secure:
+        BuildType: secure
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
+        MSBuildConfiguration: Release
+      Debug x86:
+        BuildType: debug
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -A Win32
+        MSBuildConfiguration: Debug
+      Release x86:
+        BuildType: release
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -A Win32
+        MSBuildConfiguration: Release
+      Debug Fixed TLS:
+        BuildType: debug
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_WIN_USE_FIXED_TLS=ON
+        MSBuildConfiguration: Debug
+      Release Fixed TLS:
+        BuildType: release
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_WIN_USE_FIXED_TLS=ON
+        MSBuildConfiguration: Release
+  steps:
+  - task: CMake@1
+    inputs:
+      workingDirectory: $(BuildType)
+      cmakeArgs: .. $(cmakeExtraArgs)
+  - task: MSBuild@1
+    inputs:
+      solution: $(BuildType)/libmimalloc.sln
+      configuration: '$(MSBuildConfiguration)'
+      msbuildArguments: -m
+  - script: ctest --verbose --timeout 240 -C $(MSBuildConfiguration)
+    workingDirectory: $(BuildType)
+    displayName: CTest
+  #- script: $(BuildType)\$(BuildType)\mimalloc-test-stress
+  #  displayName: TestStress
+  #- upload: $(Build.SourcesDirectory)/$(BuildType)
+  #  artifact: mimalloc-windows-$(BuildType)
+
+- job:
+  displayName: Ubuntu 22.04
+  pool:
+    vmImage:
+     ubuntu-22.04
+  strategy:
+    matrix:
+      Debug:
+        CC: gcc
+        CXX: g++
+        BuildType: debug
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
+      Release:
+        CC: gcc
+        CXX: g++
+        BuildType: release
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
+      Secure:
+        CC: gcc
+        CXX: g++
+        BuildType: secure
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
+      Debug++:
+        CC: gcc
+        CXX: g++
+        BuildType: debug-cxx
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_USE_CXX=ON
+      Debug Clang:
+        CC: clang
+        CXX: clang++
+        BuildType: debug-clang
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
+      Release Clang:
+        CC: clang
+        CXX: clang++
+        BuildType: release-clang
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
+      Release SIMD Clang:
+        CC: clang
+        CXX: clang++
+        BuildType: release-simd-clang
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_OPT_ARCH=ON -DMI_OPT_SIMD=ON
+      Secure Clang:
+        CC: clang
+        CXX: clang++
+        BuildType: secure-clang
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
+      Debug++ Clang:
+        CC: clang
+        CXX: clang++
+        BuildType: debug-clang-cxx
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_USE_CXX=ON
+      Debug ASAN Clang:
+        CC: clang
+        CXX: clang++
+        BuildType: debug-asan-clang
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_TRACK_ASAN=ON
+      Debug UBSAN Clang:
+        CC: clang
+        CXX: clang++
+        BuildType: debug-ubsan-clang
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_DEBUG_UBSAN=ON
+      Debug TSAN Clang++:
+        CC: clang
+        CXX: clang++
+        BuildType: debug-tsan-clang-cxx
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=RelWithDebInfo -DMI_USE_CXX=ON -DMI_DEBUG_TSAN=ON
+      Debug Guarded Clang:
+        CC: clang
+        CXX: clang
+        BuildType: debug-guarded-clang
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=RelWithDebInfo -DMI_DEBUG_FULL=ON -DMI_GUARDED=ON
+
+  steps:
+  - task: CMake@1
+    inputs:
+      workingDirectory: $(BuildType)
+      cmakeArgs: .. $(cmakeExtraArgs)
+  - script: make -j$(nproc) -C $(BuildType)
+    displayName: Make
+  - script: ctest --verbose --timeout 240
+    workingDirectory: $(BuildType)
+    displayName: CTest
+    env:
+      MIMALLOC_GUARDED_SAMPLE_RATE: 1000
+#  - upload: $(Build.SourcesDirectory)/$(BuildType)
+#    artifact: mimalloc-ubuntu-$(BuildType)
+
+- job:
+  displayName: macOS 14 (Sonoma)
+  pool:
+    vmImage:
+      macOS-14
+  strategy:
+    matrix:
+      Debug:
+        BuildType: debug
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
+      Release:
+        BuildType: release
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
+      Release SIMD:
+        BuildType: release-simd
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_OPT_ARCH=ON -DMI_OPT_SIMD=ON
+      Secure:
+        BuildType: secure
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
+  steps:
+  - task: CMake@1
+    inputs:
+      workingDirectory: $(BuildType)
+      cmakeArgs: .. $(cmakeExtraArgs)
+  - script: make -j$(sysctl -n hw.ncpu) -C $(BuildType)
+    displayName: Make
+  - script: ctest --verbose --timeout 240
+    workingDirectory: $(BuildType)
+    displayName: CTest
+#  - upload: $(Build.SourcesDirectory)/$(BuildType)
+#    artifact: mimalloc-macos-$(BuildType)
+
+# ----------------------------------------------------------
+# Other OS versions (just debug mode)
+# ----------------------------------------------------------
+
+- job:
+  displayName: Ubuntu 24.04
+  pool:
+    vmImage:
+      ubuntu-24.04
+  strategy:
+    matrix:
+      Debug:
+        CC: gcc
+        CXX: g++
+        BuildType: debug
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
+      Debug++:
+        CC: gcc
+        CXX: g++
+        BuildType: debug-cxx
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_USE_CXX=ON
+      Debug Clang:
+        CC: clang
+        CXX: clang++
+        BuildType: debug-clang
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
+      Debug++ Clang:
+        CC: clang
+        CXX: clang++
+        BuildType: debug-clang-cxx
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_USE_CXX=ON
+      Release Clang:
+        CC: clang
+        CXX: clang++
+        BuildType: release-clang
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
+  steps:
+  - task: CMake@1
+    inputs:
+      workingDirectory: $(BuildType)
+      cmakeArgs: .. $(cmakeExtraArgs)
+  - script: make -j$(nproc) -C $(BuildType)
+    displayName: Make
+  - script: ctest --verbose --timeout 240
+    workingDirectory: $(BuildType)
+    displayName: CTest
+
+- job:
+  displayName: macOS 15 (Sequoia)
+  pool:
+    vmImage:
+      macOS-15
+  strategy:
+    matrix:
+      Debug:
+        BuildType: debug
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
+      Release:
+        BuildType: release
+        cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
+  steps:
+  - task: CMake@1
+    inputs:
+      workingDirectory: $(BuildType)
+      cmakeArgs: .. $(cmakeExtraArgs)
+  - script: make -j$(sysctl -n hw.ncpu) -C $(BuildType)
+    displayName: Make
+  - script: ctest --verbose --timeout 240
+    workingDirectory: $(BuildType)
+    displayName: CTest
diff --git a/3rd/mimalloc-3.2.6/bin/readme.md b/3rd/mimalloc-3.2.6/bin/readme.md
new file mode 100644
index 00000000..69eed411
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/bin/readme.md
@@ -0,0 +1,118 @@
+# Windows Override
+
+<span id="override_on_windows">We use a separate redirection DLL to override mimalloc on Windows</span> 
+such that we redirect all malloc/free calls that go through the (dynamic) C runtime allocator, 
+including those from other DLL's or libraries. As it intercepts all allocation calls on a low level, 
+it can be used on large programs that include other 3rd party components.
+There are four requirements to make the overriding work well:
+
+1. Use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch).
+
+2. Link your program explicitly with the `mimalloc.dll.lib` export library for
+   the `mimalloc.dll` -- which contains all mimalloc functionality.
+   To ensure the `mimalloc.dll` is actually loaded at run-time it is easiest 
+   to insert some call to the mimalloc API in the `main` function, like `mi_version()`
+   (or use the `/include:mi_version` switch on the linker, or
+   similarly, `#pragma comment(linker, "/include:mi_version")` in some source file). 
+   See the `mimalloc-test-override` project for an example on how to use this. 
+
+3. The `mimalloc-redirect.dll` must be put in the same folder as the main 
+   `mimalloc.dll` at runtime (as it is a dependency of that DLL).
+   The redirection DLL ensures that all calls to the C runtime malloc API get 
+   redirected to mimalloc functions (which reside in `mimalloc.dll`).
+
+4. Ensure the `mimalloc.dll` comes as early as possible in the import
+   list of the final executable (so it can intercept all potential allocations).
+   You can use `minject -l <exe>` to check this if needed.
+
+```csharp
+┌──────────────┐                                                    
+│ Your Program │                                                    
+└────┬─────────┘                                                    
+     │                                                              
+     │ mi_version()  ┌───────────────┐     ┌───────────────────────┐
+     ├──────────────►│ mimalloc.dll  ├────►│ mimalloc-redirect.dll │
+     │               └──────┬────────┘     └───────────────────────┘
+     │                      ▼                                       
+     │ malloc() etc. ┌──────────────┐                               
+     ├──────────────►│ ucrtbase.dll │                               
+     │               └──────────────┘                               
+     │                                                              
+     │                                                              
+     └──────────────► ...                                           
+```
+
+For best performance on Windows with C++, it
+is also recommended to also override the `new`/`delete` operations (by including
+[`mimalloc-new-delete.h`](../include/mimalloc-new-delete.h) 
+a single(!) source file in your project).
+
+The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic
+overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully 
+redirected.
+
+### Other Platforms
+
+You always link with `mimalloc.dll` but for different platforms you may 
+need a specific redirection DLL:
+
+- __x64__: `mimalloc-redirect.dll`.
+- __x86__: `mimalloc-redirect32.dll`. Use for older 32-bit Windows programs.
+- __arm64__: `mimalloc-redirect-arm64.dll`. Use for native Windows arm64 programs.
+- __arm64ec__: `mimalloc-redirect-arm64ec.dll`. The [arm64ec] ABI is "emulation compatible" 
+  mode on Windows arm64. Unfortunately we cannot run x64 code emulated on Windows arm64 with
+  the x64 mimalloc override directly (since the C runtime always uses `arm64ec`). Instead:
+  1. Build the program as normal for x64 and link as normal with the x64 
+     `mimalloc.dll.lib` export library.
+  2. Now separately build `mimalloc.dll` in `arm64ec` mode and _overwrite_ your
+     previous (x64) `mimalloc.dll` -- the loader can handle the mix of arm64ec
+     and x64 code. Now use `mimalloc-redirect-arm64ec.dll` to match your new
+     arm64ec `mimalloc.dll`. The main program stays as is and can be fully x64 
+     or contain more arm64ec modules. At runtime, the arm64ec `mimalloc.dll` will
+     run with native arm64 instructions while the rest of the program runs emulated x64.
+
+[arm64ec]: https://learn.microsoft.com/en-us/windows/arm/arm64ec
+
+
+### Minject
+
+We cannot always re-link an executable with `mimalloc.dll`, and similarly, we 
+cannot always ensure that the DLL comes first in the import table of the final executable.
+In many cases though we can patch existing executables without any recompilation
+if they are linked with the dynamic C runtime (`ucrtbase.dll`) -- just put the 
+`mimalloc.dll` into the import table (and put `mimalloc-redirect.dll` in the same 
+directory) Such patching can be done for example with [CFF Explorer](https://ntcore.com/?page_id=388).
+
+The `minject` program can also do this from the command line
+Use `minject --help` for options:
+
+```
+> minject --help
+
+minject:
+  Injects the mimalloc dll into the import table of a 64-bit executable,
+  and/or ensures that it comes first in het import table.
+
+usage:
+  > minject [options] <exe>
+
+options:
+  -h   --help        show this help
+  -v   --verbose     be verbose
+  -l   --list        only list imported modules
+  -i   --inplace     update the exe in-place (make sure there is a backup!)
+  -f   --force       always overwrite without prompting
+       --postfix=<p> use <p> as a postfix to the mimalloc dll.
+                     e.g. use --postfix=debug to link with mimalloc-debug.dll
+
+notes:
+  Without '--inplace' an injected <exe> is generated with the same name ending in '-mi'.
+  Ensure 'mimalloc-redirect.dll' is in the same folder as the mimalloc dll.
+
+examples:
+  > minject --list myprogram.exe
+  > minject --force --inplace myprogram.exe
+```  
+
+For x86 32-bit binaries, use `minject32`, and for arm64 binaries use `minject-arm64`.
+
diff --git a/3rd/mimalloc-2.0.9/cmake/JoinPaths.cmake b/3rd/mimalloc-3.2.6/cmake/JoinPaths.cmake
similarity index 100%
rename from 3rd/mimalloc-2.0.9/cmake/JoinPaths.cmake
rename to 3rd/mimalloc-3.2.6/cmake/JoinPaths.cmake
diff --git a/3rd/mimalloc-2.0.9/cmake/mimalloc-config-version.cmake b/3rd/mimalloc-3.2.6/cmake/mimalloc-config-version.cmake
similarity index 89%
rename from 3rd/mimalloc-2.0.9/cmake/mimalloc-config-version.cmake
rename to 3rd/mimalloc-3.2.6/cmake/mimalloc-config-version.cmake
index 64d71053..aef56534 100644
--- a/3rd/mimalloc-2.0.9/cmake/mimalloc-config-version.cmake
+++ b/3rd/mimalloc-3.2.6/cmake/mimalloc-config-version.cmake
@@ -1,6 +1,6 @@
-set(mi_version_major 2)
-set(mi_version_minor 0)
-set(mi_version_patch 9)
+set(mi_version_major 3)
+set(mi_version_minor 2)
+set(mi_version_patch 6)
 set(mi_version ${mi_version_major}.${mi_version_minor})
 
 set(PACKAGE_VERSION ${mi_version})
diff --git a/3rd/mimalloc-2.0.9/cmake/mimalloc-config.cmake b/3rd/mimalloc-3.2.6/cmake/mimalloc-config.cmake
similarity index 100%
rename from 3rd/mimalloc-2.0.9/cmake/mimalloc-config.cmake
rename to 3rd/mimalloc-3.2.6/cmake/mimalloc-config.cmake
diff --git a/3rd/mimalloc-3.2.6/contrib/docker/alpine-arm32v7/Dockerfile b/3rd/mimalloc-3.2.6/contrib/docker/alpine-arm32v7/Dockerfile
new file mode 100644
index 00000000..daa60f50
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/contrib/docker/alpine-arm32v7/Dockerfile
@@ -0,0 +1,28 @@
+# install from an image
+# download first an appropriate tar.gz image into the current directory
+# from <https://github.com/alpinelinux/docker-alpine/tree/edge/armv7>
+FROM scratch
+
+# Substitute the image name that was downloaded
+ADD alpine-minirootfs-20240329-armv7.tar.gz /
+
+# Install tools
+RUN apk add build-base make cmake
+RUN apk add git
+RUN apk add vim
+
+RUN mkdir -p  /home/dev
+WORKDIR /home/dev
+
+# Get mimalloc
+RUN git clone https://github.com/microsoft/mimalloc -b dev2
+RUN mkdir -p mimalloc/out/release
+RUN mkdir -p mimalloc/out/debug
+
+# Build mimalloc debug
+WORKDIR /home/dev/mimalloc/out/debug
+RUN cmake ../.. -DMI_DEBUG_FULL=ON
+RUN make -j
+RUN make test
+
+CMD ["/bin/sh"]
diff --git a/3rd/mimalloc-3.2.6/contrib/docker/alpine-x86/Dockerfile b/3rd/mimalloc-3.2.6/contrib/docker/alpine-x86/Dockerfile
new file mode 100644
index 00000000..a0f76c17
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/contrib/docker/alpine-x86/Dockerfile
@@ -0,0 +1,28 @@
+# install from an image
+# download first an appropriate tar.gz image into the current directory
+# from <https://github.com/alpinelinux/docker-alpine/tree/edge/x86>
+FROM scratch
+
+# Substitute the image name that was downloaded
+ADD alpine-minirootfs-20250108-x86.tar.gz /
+
+# Install tools
+RUN apk add build-base make cmake
+RUN apk add git
+RUN apk add vim
+
+RUN mkdir -p  /home/dev
+WORKDIR /home/dev
+
+# Get mimalloc
+RUN git clone https://github.com/microsoft/mimalloc -b dev2
+RUN mkdir -p mimalloc/out/release
+RUN mkdir -p mimalloc/out/debug
+
+# Build mimalloc debug
+WORKDIR /home/dev/mimalloc/out/debug
+RUN cmake ../.. -DMI_DEBUG_FULL=ON
+# RUN make -j
+# RUN make test
+
+CMD ["/bin/sh"]
diff --git a/3rd/mimalloc-3.2.6/contrib/docker/alpine/Dockerfile b/3rd/mimalloc-3.2.6/contrib/docker/alpine/Dockerfile
new file mode 100644
index 00000000..e1234a9b
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/contrib/docker/alpine/Dockerfile
@@ -0,0 +1,23 @@
+# alpine image  
+FROM alpine
+
+# Install tools
+RUN apk add build-base make cmake
+RUN apk add git
+RUN apk add vim
+
+RUN mkdir -p  /home/dev
+WORKDIR /home/dev
+
+# Get mimalloc
+RUN git clone https://github.com/microsoft/mimalloc -b dev2
+RUN mkdir -p mimalloc/out/release
+RUN mkdir -p mimalloc/out/debug
+
+# Build mimalloc debug
+WORKDIR /home/dev/mimalloc/out/debug
+RUN cmake ../.. -DMI_DEBUG_FULL=ON
+RUN make -j
+RUN make test
+
+CMD ["/bin/sh"]
\ No newline at end of file
diff --git a/3rd/mimalloc-3.2.6/contrib/docker/manylinux-x64/Dockerfile b/3rd/mimalloc-3.2.6/contrib/docker/manylinux-x64/Dockerfile
new file mode 100644
index 00000000..ff54d674
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/contrib/docker/manylinux-x64/Dockerfile
@@ -0,0 +1,23 @@
+FROM quay.io/pypa/manylinux2014_x86_64
+
+# Install tools
+RUN yum install -y openssl-devel
+RUN yum install -y gcc gcc-c++ kernel-devel make
+RUN yum install -y git cmake
+RUN yum install -y vim
+
+RUN mkdir -p  /home/dev
+WORKDIR /home/dev
+
+# Get mimalloc
+RUN git clone https://github.com/microsoft/mimalloc -b dev2
+RUN mkdir -p mimalloc/out/release
+RUN mkdir -p mimalloc/out/debug
+
+# Build mimalloc debug
+WORKDIR /home/dev/mimalloc/out/debug
+RUN cmake ../.. -DMI_DEBUG_FULL=ON
+RUN make -j
+RUN make test
+
+CMD ["/bin/sh"]
\ No newline at end of file
diff --git a/3rd/mimalloc-3.2.6/contrib/docker/readme.md b/3rd/mimalloc-3.2.6/contrib/docker/readme.md
new file mode 100644
index 00000000..b3d90094
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/contrib/docker/readme.md
@@ -0,0 +1,10 @@
+Various example docker files used for testing.
+
+Usage:
+
+```
+> cd <host>
+> docker build -t <host>-mimalloc .
+> docker run -it <host>-mimalloc
+>> make test
+```
diff --git a/3rd/mimalloc-3.2.6/contrib/vcpkg/portfile.cmake b/3rd/mimalloc-3.2.6/contrib/vcpkg/portfile.cmake
new file mode 100644
index 00000000..e6df0f58
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/contrib/vcpkg/portfile.cmake
@@ -0,0 +1,64 @@
+vcpkg_from_github(
+  OUT_SOURCE_PATH SOURCE_PATH
+  REPO microsoft/mimalloc
+  HEAD_REF master
+
+  # The `REF` can be a commit hash, branch name (`dev3`), or a version (`v3.1.4`).
+  # Use `"v${VERSION}"` to use the version specified in `vcpkg.json`.
+  REF "v${VERSION}"
+  
+  # The sha512 is the hash of the tar.gz bundle.
+  # (To get the sha512, run `vcpkg install "mimalloc[override]" --overlay-ports=./contrib/vcpkg` and copy the sha from the error message.)
+  SHA512 616351e549707318c1f8b164251141684a73d5bf8205b905736f48ab21fbb19bfaa4d52c4e63642fcb144345b6a5331944b6c8e0827925000553e46f2c2c31e9
+)
+
+vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS
+  FEATURES
+    c           MI_NO_USE_CXX
+    guarded     MI_GUARDED
+    secure      MI_SECURE
+    override    MI_OVERRIDE
+    optarch     MI_OPT_ARCH
+    nooptarch   MI_NO_OPT_ARCH
+    optsimd     MI_OPT_SIMD
+    xmalloc     MI_XMALLOC
+    asm         MI_SEE_ASM
+)
+string(COMPARE EQUAL "${VCPKG_LIBRARY_LINKAGE}" "static" MI_BUILD_STATIC)
+string(COMPARE EQUAL "${VCPKG_LIBRARY_LINKAGE}" "dynamic" MI_BUILD_SHARED)
+
+vcpkg_cmake_configure(
+  SOURCE_PATH "${SOURCE_PATH}"
+  OPTIONS
+    -DMI_USE_CXX=ON
+    -DMI_BUILD_TESTS=OFF
+    -DMI_BUILD_OBJECT=ON
+    -DMI_BUILD_STATIC=${MI_BUILD_STATIC}
+    -DMI_BUILD_SHARED=${MI_BUILD_SHARED}
+    -DMI_INSTALL_TOPLEVEL=ON
+    ${FEATURE_OPTIONS}
+)
+
+vcpkg_cmake_install()
+vcpkg_copy_pdbs()
+
+file(COPY
+  "${CMAKE_CURRENT_LIST_DIR}/vcpkg-cmake-wrapper.cmake"
+  "${CMAKE_CURRENT_LIST_DIR}/usage"
+  DESTINATION "${CURRENT_PACKAGES_DIR}/share/${PORT}"
+)
+vcpkg_cmake_config_fixup(CONFIG_PATH lib/cmake/mimalloc)
+
+if(VCPKG_LIBRARY_LINKAGE STREQUAL "dynamic")
+  # todo: why is this needed?
+  vcpkg_replace_string(
+    "${CURRENT_PACKAGES_DIR}/include/mimalloc.h"
+    "!defined(MI_SHARED_LIB)"
+    "0 // !defined(MI_SHARED_LIB)"
+  )
+endif()
+file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include")
+file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/share")
+
+vcpkg_fixup_pkgconfig()
+vcpkg_install_copyright(FILE_LIST "${SOURCE_PATH}/LICENSE")
diff --git a/3rd/mimalloc-3.2.6/contrib/vcpkg/readme.md b/3rd/mimalloc-3.2.6/contrib/vcpkg/readme.md
new file mode 100644
index 00000000..014f2867
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/contrib/vcpkg/readme.md
@@ -0,0 +1,40 @@
+# Vcpkg support
+
+This directory is meant to provide the sources for the official [vcpkg port] 
+of mimalloc, but can also be used to override the official port with
+your own variant.
+
+For example, you can edit the [`portfile.cmake`](portfile.cmake) 
+to check out a specific commit, version, or branch of mimalloc, or set further options. 
+You can install such custom port as:
+
+```sh
+$ vcpkg install "mimalloc[override]" --recurse --overlay-ports=./contrib/vcpkg
+```
+
+This will also show the correct sha512 hash if you use a custom version.
+Another way is to refer to the overlay from the [vcpkg-configuration.json](https://learn.microsoft.com/en-us/vcpkg/reference/vcpkg-configuration-json) file.
+See also the vcpkg [documentation](https://learn.microsoft.com/en-us/vcpkg/produce/update-package-version) for more information.
+
+
+# Using mimalloc from vcpkg
+
+When using [cmake with vcpkg](https://learn.microsoft.com/en-us/vcpkg/get_started/get-started?pivots=shell-powershell), 
+you can use mimalloc from the `CMakeLists.txt` as:
+
+```cmake
+find_package(mimalloc CONFIG REQUIRED)
+target_link_libraries(main PRIVATE mimalloc)
+```
+
+See [`test/CMakeLists.txt](../../test/CMakeLists.txt) for more examples.
+
+
+# Acknowledgements
+
+The original port for vckpg was contributed by many people, including: @vicroms, @myd7349, @PhoubeHui, @LilyWangL,
+@JonLiu1993, @RT2Code, Remy Tassoux, @wangao, @BillyONeal, @jiayuehua, @dg0yt, @gerar-ryan-immersaview, @nickdademo, 
+and @jimwang118 -- Thank you so much!
+
+
+[vcpkg port]: https://github.com/microsoft/vcpkg/tree/master/ports/mimalloc
diff --git a/3rd/mimalloc-3.2.6/contrib/vcpkg/usage b/3rd/mimalloc-3.2.6/contrib/vcpkg/usage
new file mode 100644
index 00000000..7ce649a7
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/contrib/vcpkg/usage
@@ -0,0 +1,20 @@
+Use the following CMake targets to import mimalloc:
+
+  find_package(mimalloc CONFIG REQUIRED)
+  target_link_libraries(main PRIVATE mimalloc)
+
+And use mimalloc in your sources as:
+
+  #include <mimalloc.h>
+  #include <stdio.h>
+  int main(int argc, char** argv) {
+    int* p = mi_malloc_tp(int);
+    *p = mi_version();
+    printf("mimalloc version: %d\n", *p);
+    mi_free(p);
+    return 0;
+  }
+
+When dynamically overriding on Windows, ensure `mimalloc.dll` is linked through some call to
+mimalloc (e.g. `mi_version()`), and that the `mimalloc-redirect.dll` is in the same directory.
+See https://github.com/microsoft/mimalloc/blob/dev/bin/readme.md for detailed information.
diff --git a/3rd/mimalloc-3.2.6/contrib/vcpkg/vcpkg-cmake-wrapper.cmake b/3rd/mimalloc-3.2.6/contrib/vcpkg/vcpkg-cmake-wrapper.cmake
new file mode 100644
index 00000000..1b355722
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/contrib/vcpkg/vcpkg-cmake-wrapper.cmake
@@ -0,0 +1,20 @@
+_find_package(${ARGS})
+
+if(CMAKE_CURRENT_LIST_DIR STREQUAL "${MIMALLOC_CMAKE_DIR}/${MIMALLOC_VERSION_DIR}")
+    set(MIMALLOC_INCLUDE_DIR "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/include")
+    # As in vcpkg.cmake
+    if(NOT DEFINED CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE MATCHES "^[Dd][Ee][Bb][Uu][Gg]$")
+        set(MIMALLOC_LIBRARY_DIR "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/debug/lib")
+    else()
+        set(MIMALLOC_LIBRARY_DIR "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib")
+    endif()
+    set(MIMALLOC_OBJECT_DIR "${MIMALLOC_LIBRARY_DIR}")
+    set(MIMALLOC_TARGET_DIR "${MIMALLOC_LIBRARY_DIR}")
+endif()
+
+# vcpkg always configures either a static or dynamic library.
+# ensure to always expose the mimalloc target as either the static or dynamic build.
+if(TARGET mimalloc-static AND NOT TARGET mimalloc)
+  add_library(mimalloc INTERFACE IMPORTED)
+  set_target_properties(mimalloc PROPERTIES INTERFACE_LINK_LIBRARIES mimalloc-static)
+endif()
diff --git a/3rd/mimalloc-3.2.6/contrib/vcpkg/vcpkg.json b/3rd/mimalloc-3.2.6/contrib/vcpkg/vcpkg.json
new file mode 100644
index 00000000..9f5c2859
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/contrib/vcpkg/vcpkg.json
@@ -0,0 +1,48 @@
+{
+  "name": "mimalloc",
+  "version": "3.2.6",
+  "port-version": 0,
+  "description": "Compact general purpose allocator with excellent performance",
+  "homepage": "https://github.com/microsoft/mimalloc",
+  "license": "MIT",
+  "supports": "!uwp",
+  "dependencies": [
+    {
+      "name": "vcpkg-cmake",
+      "host": true
+    },
+    {
+      "name": "vcpkg-cmake-config",
+      "host": true
+    }
+  ],
+  "features": {
+    "c": {
+      "description": "Use C11 compilation (this can still override new/delete)"
+    },
+    "override": {
+      "description": "Override the standard malloc/free interface"
+    },
+    "secure": {
+      "description": "Use full security mitigations (like guard pages and randomization)"
+    },
+    "guarded": {
+      "description": "Use build that support guard pages after objects controlled with MIMALLOC_GUARDED_SAMPLE_RATE"
+    },
+    "xmalloc": {
+      "description": "If out-of-memory, call abort() instead of returning NULL"
+    },
+    "optarch": {
+      "description": "Use architecture specific optimizations (on x64: '-march=haswell;-mavx2', on arm64: '-march=armv8.1-a')"
+    },
+    "nooptarch": {
+      "description": "Do _not_ use architecture specific optimizations (on x64: '-march=haswell;-mavx2', on arm64: '-march=armv8.1-a')"
+    },
+    "optsimd": {
+      "description": "Allow use of SIMD instructions (avx2 or neon) (requires 'optarch' to be enabled)"
+    },
+    "asm": {
+      "description": "Generate assembly files"
+    }
+  }
+}
\ No newline at end of file
diff --git a/3rd/mimalloc-2.0.9/doc/bench-2020/bench-c5-18xlarge-2020-01-20-a.svg b/3rd/mimalloc-3.2.6/doc/bench-2020/bench-c5-18xlarge-2020-01-20-a.svg
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/bench-2020/bench-c5-18xlarge-2020-01-20-a.svg
rename to 3rd/mimalloc-3.2.6/doc/bench-2020/bench-c5-18xlarge-2020-01-20-a.svg
diff --git a/3rd/mimalloc-2.0.9/doc/bench-2020/bench-c5-18xlarge-2020-01-20-b.svg b/3rd/mimalloc-3.2.6/doc/bench-2020/bench-c5-18xlarge-2020-01-20-b.svg
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/bench-2020/bench-c5-18xlarge-2020-01-20-b.svg
rename to 3rd/mimalloc-3.2.6/doc/bench-2020/bench-c5-18xlarge-2020-01-20-b.svg
diff --git a/3rd/mimalloc-2.0.9/doc/bench-2020/bench-c5-18xlarge-2020-01-20-rss-a.svg b/3rd/mimalloc-3.2.6/doc/bench-2020/bench-c5-18xlarge-2020-01-20-rss-a.svg
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/bench-2020/bench-c5-18xlarge-2020-01-20-rss-a.svg
rename to 3rd/mimalloc-3.2.6/doc/bench-2020/bench-c5-18xlarge-2020-01-20-rss-a.svg
diff --git a/3rd/mimalloc-2.0.9/doc/bench-2020/bench-c5-18xlarge-2020-01-20-rss-b.svg b/3rd/mimalloc-3.2.6/doc/bench-2020/bench-c5-18xlarge-2020-01-20-rss-b.svg
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/bench-2020/bench-c5-18xlarge-2020-01-20-rss-b.svg
rename to 3rd/mimalloc-3.2.6/doc/bench-2020/bench-c5-18xlarge-2020-01-20-rss-b.svg
diff --git a/3rd/mimalloc-2.0.9/doc/bench-2020/bench-r5a-1.svg b/3rd/mimalloc-3.2.6/doc/bench-2020/bench-r5a-1.svg
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/bench-2020/bench-r5a-1.svg
rename to 3rd/mimalloc-3.2.6/doc/bench-2020/bench-r5a-1.svg
diff --git a/3rd/mimalloc-2.0.9/doc/bench-2020/bench-r5a-12xlarge-2020-01-16-a.svg b/3rd/mimalloc-3.2.6/doc/bench-2020/bench-r5a-12xlarge-2020-01-16-a.svg
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/bench-2020/bench-r5a-12xlarge-2020-01-16-a.svg
rename to 3rd/mimalloc-3.2.6/doc/bench-2020/bench-r5a-12xlarge-2020-01-16-a.svg
diff --git a/3rd/mimalloc-2.0.9/doc/bench-2020/bench-r5a-12xlarge-2020-01-16-b.svg b/3rd/mimalloc-3.2.6/doc/bench-2020/bench-r5a-12xlarge-2020-01-16-b.svg
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/bench-2020/bench-r5a-12xlarge-2020-01-16-b.svg
rename to 3rd/mimalloc-3.2.6/doc/bench-2020/bench-r5a-12xlarge-2020-01-16-b.svg
diff --git a/3rd/mimalloc-2.0.9/doc/bench-2020/bench-r5a-2.svg b/3rd/mimalloc-3.2.6/doc/bench-2020/bench-r5a-2.svg
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/bench-2020/bench-r5a-2.svg
rename to 3rd/mimalloc-3.2.6/doc/bench-2020/bench-r5a-2.svg
diff --git a/3rd/mimalloc-2.0.9/doc/bench-2020/bench-r5a-rss-1.svg b/3rd/mimalloc-3.2.6/doc/bench-2020/bench-r5a-rss-1.svg
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/bench-2020/bench-r5a-rss-1.svg
rename to 3rd/mimalloc-3.2.6/doc/bench-2020/bench-r5a-rss-1.svg
diff --git a/3rd/mimalloc-2.0.9/doc/bench-2020/bench-r5a-rss-2.svg b/3rd/mimalloc-3.2.6/doc/bench-2020/bench-r5a-rss-2.svg
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/bench-2020/bench-r5a-rss-2.svg
rename to 3rd/mimalloc-3.2.6/doc/bench-2020/bench-r5a-rss-2.svg
diff --git a/3rd/mimalloc-2.0.9/doc/bench-2020/bench-spec-rss.svg b/3rd/mimalloc-3.2.6/doc/bench-2020/bench-spec-rss.svg
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/bench-2020/bench-spec-rss.svg
rename to 3rd/mimalloc-3.2.6/doc/bench-2020/bench-spec-rss.svg
diff --git a/3rd/mimalloc-2.0.9/doc/bench-2020/bench-spec.svg b/3rd/mimalloc-3.2.6/doc/bench-2020/bench-spec.svg
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/bench-2020/bench-spec.svg
rename to 3rd/mimalloc-3.2.6/doc/bench-2020/bench-spec.svg
diff --git a/3rd/mimalloc-2.0.9/doc/bench-2020/bench-z4-1.svg b/3rd/mimalloc-3.2.6/doc/bench-2020/bench-z4-1.svg
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/bench-2020/bench-z4-1.svg
rename to 3rd/mimalloc-3.2.6/doc/bench-2020/bench-z4-1.svg
diff --git a/3rd/mimalloc-2.0.9/doc/bench-2020/bench-z4-2.svg b/3rd/mimalloc-3.2.6/doc/bench-2020/bench-z4-2.svg
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/bench-2020/bench-z4-2.svg
rename to 3rd/mimalloc-3.2.6/doc/bench-2020/bench-z4-2.svg
diff --git a/3rd/mimalloc-2.0.9/doc/bench-2020/bench-z4-rss-1.svg b/3rd/mimalloc-3.2.6/doc/bench-2020/bench-z4-rss-1.svg
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/bench-2020/bench-z4-rss-1.svg
rename to 3rd/mimalloc-3.2.6/doc/bench-2020/bench-z4-rss-1.svg
diff --git a/3rd/mimalloc-2.0.9/doc/bench-2020/bench-z4-rss-2.svg b/3rd/mimalloc-3.2.6/doc/bench-2020/bench-z4-rss-2.svg
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/bench-2020/bench-z4-rss-2.svg
rename to 3rd/mimalloc-3.2.6/doc/bench-2020/bench-z4-rss-2.svg
diff --git a/3rd/mimalloc-2.0.9/doc/bench-2021/bench-amd5950x-2021-01-30-a.svg b/3rd/mimalloc-3.2.6/doc/bench-2021/bench-amd5950x-2021-01-30-a.svg
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/bench-2021/bench-amd5950x-2021-01-30-a.svg
rename to 3rd/mimalloc-3.2.6/doc/bench-2021/bench-amd5950x-2021-01-30-a.svg
diff --git a/3rd/mimalloc-2.0.9/doc/bench-2021/bench-amd5950x-2021-01-30-b.svg b/3rd/mimalloc-3.2.6/doc/bench-2021/bench-amd5950x-2021-01-30-b.svg
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/bench-2021/bench-amd5950x-2021-01-30-b.svg
rename to 3rd/mimalloc-3.2.6/doc/bench-2021/bench-amd5950x-2021-01-30-b.svg
diff --git a/3rd/mimalloc-2.0.9/doc/bench-2021/bench-c5-18xlarge-2021-01-30-a.svg b/3rd/mimalloc-3.2.6/doc/bench-2021/bench-c5-18xlarge-2021-01-30-a.svg
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/bench-2021/bench-c5-18xlarge-2021-01-30-a.svg
rename to 3rd/mimalloc-3.2.6/doc/bench-2021/bench-c5-18xlarge-2021-01-30-a.svg
diff --git a/3rd/mimalloc-2.0.9/doc/bench-2021/bench-c5-18xlarge-2021-01-30-b.svg b/3rd/mimalloc-3.2.6/doc/bench-2021/bench-c5-18xlarge-2021-01-30-b.svg
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/bench-2021/bench-c5-18xlarge-2021-01-30-b.svg
rename to 3rd/mimalloc-3.2.6/doc/bench-2021/bench-c5-18xlarge-2021-01-30-b.svg
diff --git a/3rd/mimalloc-2.0.9/doc/bench-2021/bench-c5-18xlarge-2021-01-30-rss-a.svg b/3rd/mimalloc-3.2.6/doc/bench-2021/bench-c5-18xlarge-2021-01-30-rss-a.svg
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/bench-2021/bench-c5-18xlarge-2021-01-30-rss-a.svg
rename to 3rd/mimalloc-3.2.6/doc/bench-2021/bench-c5-18xlarge-2021-01-30-rss-a.svg
diff --git a/3rd/mimalloc-2.0.9/doc/bench-2021/bench-c5-18xlarge-2021-01-30-rss-b.svg b/3rd/mimalloc-3.2.6/doc/bench-2021/bench-c5-18xlarge-2021-01-30-rss-b.svg
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/bench-2021/bench-c5-18xlarge-2021-01-30-rss-b.svg
rename to 3rd/mimalloc-3.2.6/doc/bench-2021/bench-c5-18xlarge-2021-01-30-rss-b.svg
diff --git a/3rd/mimalloc-2.0.9/doc/bench-2021/bench-macmini-2021-01-30.svg b/3rd/mimalloc-3.2.6/doc/bench-2021/bench-macmini-2021-01-30.svg
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/bench-2021/bench-macmini-2021-01-30.svg
rename to 3rd/mimalloc-3.2.6/doc/bench-2021/bench-macmini-2021-01-30.svg
diff --git a/3rd/mimalloc-2.0.9/doc/doxyfile b/3rd/mimalloc-3.2.6/doc/doxyfile
similarity index 80%
rename from 3rd/mimalloc-2.0.9/doc/doxyfile
rename to 3rd/mimalloc-3.2.6/doc/doxyfile
index 73266f4e..53f874cf 100644
--- a/3rd/mimalloc-2.0.9/doc/doxyfile
+++ b/3rd/mimalloc-3.2.6/doc/doxyfile
@@ -1,4 +1,4 @@
-# Doxyfile 1.9.1
+# Doxyfile 1.11.0
 
 # This file describes the settings to be used by the documentation system
 # doxygen (www.doxygen.org) for a project.
@@ -12,6 +12,16 @@
 # For lists, items can also be appended using:
 # TAG += value [value, ...]
 # Values that contain spaces should be placed between quotes (\" \").
+#
+# Note:
+#
+# Use doxygen to compare the used configuration file with the template
+# configuration file:
+# doxygen -x [configFile]
+# Use doxygen to compare the used configuration file with the template
+# configuration file without replacing the environment variables or CMake type
+# replacement variables:
+# doxygen -x_noenv [configFile]
 
 #---------------------------------------------------------------------------
 # Project related configuration options
@@ -38,7 +48,7 @@ PROJECT_NAME           = mi-malloc
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = 1.7/2.0
+PROJECT_NUMBER         = 1.8/2.1
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
@@ -53,6 +63,12 @@ PROJECT_BRIEF          =
 
 PROJECT_LOGO           = mimalloc-logo.svg
 
+# With the PROJECT_ICON tag one can specify an icon that is included in the tabs
+# when the HTML document is shown. Doxygen will copy the logo to the output
+# directory.
+
+PROJECT_ICON           =
+
 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path
 # into which the generated documentation will be written. If a relative path is
 # entered, it will be relative to the location where doxygen was started. If
@@ -60,16 +76,28 @@ PROJECT_LOGO           = mimalloc-logo.svg
 
 OUTPUT_DIRECTORY       = ..
 
-# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub-
-# directories (in 2 levels) under the output directory of each output format and
-# will distribute the generated files over these directories. Enabling this
+# If the CREATE_SUBDIRS tag is set to YES then doxygen will create up to 4096
+# sub-directories (in 2 levels) under the output directory of each output format
+# and will distribute the generated files over these directories. Enabling this
 # option can be useful when feeding doxygen a huge amount of source files, where
 # putting all generated files in the same directory would otherwise causes
-# performance problems for the file system.
+# performance problems for the file system. Adapt CREATE_SUBDIRS_LEVEL to
+# control the number of sub-directories.
 # The default value is: NO.
 
 CREATE_SUBDIRS         = NO
 
+# Controls the number of sub-directories that will be created when
+# CREATE_SUBDIRS tag is set to YES. Level 0 represents 16 directories, and every
+# level increment doubles the number of directories, resulting in 4096
+# directories at level 8 which is the default and also the maximum value. The
+# sub-directories are organized in 2 levels, the first level always has a fixed
+# number of 16 directories.
+# Minimum value: 0, maximum value: 8, default value: 8.
+# This tag requires that the tag CREATE_SUBDIRS is set to YES.
+
+CREATE_SUBDIRS_LEVEL   = 8
+
 # If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII
 # characters to appear in the names of generated files. If set to NO, non-ASCII
 # characters will be escaped, for example _xE3_x81_x84 will be used for Unicode
@@ -81,26 +109,18 @@ ALLOW_UNICODE_NAMES    = NO
 # The OUTPUT_LANGUAGE tag is used to specify the language in which all
 # documentation generated by doxygen is written. Doxygen will use this
 # information to generate all constant output in the proper language.
-# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese,
-# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States),
-# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian,
-# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages),
-# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian,
-# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian,
-# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish,
-# Ukrainian and Vietnamese.
+# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Bulgarian,
+# Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, Dutch, English
+# (United States), Esperanto, Farsi (Persian), Finnish, French, German, Greek,
+# Hindi, Hungarian, Indonesian, Italian, Japanese, Japanese-en (Japanese with
+# English messages), Korean, Korean-en (Korean with English messages), Latvian,
+# Lithuanian, Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese,
+# Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish,
+# Swedish, Turkish, Ukrainian and Vietnamese.
 # The default value is: English.
 
 OUTPUT_LANGUAGE        = English
 
-# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all
-# documentation generated by doxygen is written. Doxygen will use this
-# information to generate all generated output in the proper direction.
-# Possible values are: None, LTR, RTL and Context.
-# The default value is: None.
-
-OUTPUT_TEXT_DIRECTION  = None
-
 # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member
 # descriptions after the members that are listed in the file and class
 # documentation (similar to Javadoc). Set to NO to disable this.
@@ -258,16 +278,16 @@ TAB_SIZE               = 2
 # the documentation. An alias has the form:
 # name=value
 # For example adding
-# "sideeffect=@par Side Effects:\n"
+# "sideeffect=@par Side Effects:^^"
 # will allow you to put the command \sideeffect (or @sideeffect) in the
 # documentation, which will result in a user-defined paragraph with heading
-# "Side Effects:". You can put \n's in the value part of an alias to insert
-# newlines (in the resulting output). You can put ^^ in the value part of an
-# alias to insert a newline as if a physical newline was in the original file.
-# When you need a literal { or } or , in the value part of an alias you have to
-# escape them by means of a backslash (\), this can lead to conflicts with the
-# commands \{ and \} for these it is advised to use the version @{ and @} or use
-# a double escape (\\{ and \\})
+# "Side Effects:". Note that you cannot put \n's in the value part of an alias
+# to insert newlines (in the resulting output). You can put ^^ in the value part
+# of an alias to insert a newline as if a physical newline was in the original
+# file. When you need a literal { or } or , in the value part of an alias you
+# have to escape them by means of a backslash (\), this can lead to conflicts
+# with the commands \{ and \} for these it is advised to use the version @{ and
+# @} or use a double escape (\\{ and \\})
 
 ALIASES                =
 
@@ -312,8 +332,8 @@ OPTIMIZE_OUTPUT_SLICE  = NO
 # extension. Doxygen has a built-in mapping, but you can override or extend it
 # using this tag. The format is ext=language, where ext is a file extension, and
 # language is one of the parsers supported by doxygen: IDL, Java, JavaScript,
-# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, VHDL,
-# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran:
+# Csharp (C#), C, C++, Lex, D, PHP, md (Markdown), Objective-C, Python, Slice,
+# VHDL, Fortran (fixed format Fortran: FortranFixed, free formatted Fortran:
 # FortranFree, unknown formatted Fortran: Fortran. In the later case the parser
 # tries to guess whether the code is fixed or free formatted code, this is the
 # default for Fortran type files). For instance to make doxygen treat .inc files
@@ -344,11 +364,22 @@ MARKDOWN_SUPPORT       = YES
 # to that level are automatically included in the table of contents, even if
 # they do not have an id attribute.
 # Note: This feature currently applies only to Markdown headings.
-# Minimum value: 0, maximum value: 99, default value: 5.
+# Minimum value: 0, maximum value: 99, default value: 6.
 # This tag requires that the tag MARKDOWN_SUPPORT is set to YES.
 
 TOC_INCLUDE_HEADINGS   = 0
 
+# The MARKDOWN_ID_STYLE tag can be used to specify the algorithm used to
+# generate identifiers for the Markdown headings. Note: Every identifier is
+# unique.
+# Possible values are: DOXYGEN use a fixed 'autotoc_md' string followed by a
+# sequence number starting at 0 and GITHUB use the lower case version of title
+# with any whitespace replaced by '-' and punctuation characters removed.
+# The default value is: DOXYGEN.
+# This tag requires that the tag MARKDOWN_SUPPORT is set to YES.
+
+MARKDOWN_ID_STYLE      = DOXYGEN
+
 # When enabled doxygen tries to link words that correspond to documented
 # classes, or namespaces to their corresponding documentation. Such a link can
 # be prevented in individual cases by putting a % sign in front of the word or
@@ -361,8 +392,8 @@ AUTOLINK_SUPPORT       = YES
 # to include (a tag file for) the STL sources as input, then you should set this
 # tag to YES in order to let doxygen match functions declarations and
 # definitions whose arguments contain STL classes (e.g. func(std::string);
-# versus func(std::string) {}). This also make the inheritance and collaboration
-# diagrams that involve STL classes more complete and accurate.
+# versus func(std::string) {}). This also makes the inheritance and
+# collaboration diagrams that involve STL classes more complete and accurate.
 # The default value is: NO.
 
 BUILTIN_STL_SUPPORT    = NO
@@ -374,9 +405,9 @@ BUILTIN_STL_SUPPORT    = NO
 CPP_CLI_SUPPORT        = NO
 
 # Set the SIP_SUPPORT tag to YES if your project consists of sip (see:
-# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen
-# will parse them like normal C++ but will assume all classes use public instead
-# of private inheritance when no explicit protection keyword is present.
+# https://www.riverbankcomputing.com/software) sources only. Doxygen will parse
+# them like normal C++ but will assume all classes use public instead of private
+# inheritance when no explicit protection keyword is present.
 # The default value is: NO.
 
 SIP_SUPPORT            = NO
@@ -460,19 +491,27 @@ TYPEDEF_HIDES_STRUCT   = YES
 
 LOOKUP_CACHE_SIZE      = 0
 
-# The NUM_PROC_THREADS specifies the number threads doxygen is allowed to use
+# The NUM_PROC_THREADS specifies the number of threads doxygen is allowed to use
 # during processing. When set to 0 doxygen will based this on the number of
 # cores available in the system. You can set it explicitly to a value larger
 # than 0 to get more control over the balance between CPU load and processing
 # speed. At this moment only the input processing can be done using multiple
 # threads. Since this is still an experimental feature the default is set to 1,
-# which efficively disables parallel processing. Please report any issues you
+# which effectively disables parallel processing. Please report any issues you
 # encounter. Generating dot graphs in parallel is controlled by the
 # DOT_NUM_THREADS setting.
 # Minimum value: 0, maximum value: 32, default value: 1.
 
 NUM_PROC_THREADS       = 1
 
+# If the TIMESTAMP tag is set different from NO then each generated page will
+# contain the date or date and time when the page was generated. Setting this to
+# NO can help when comparing the output of multiple runs.
+# Possible values are: YES, NO, DATETIME and DATE.
+# The default value is: NO.
+
+TIMESTAMP              = NO
+
 #---------------------------------------------------------------------------
 # Build related configuration options
 #---------------------------------------------------------------------------
@@ -554,7 +593,8 @@ HIDE_UNDOC_MEMBERS     = NO
 # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
 # undocumented classes that are normally visible in the class hierarchy. If set
 # to NO, these classes will be included in the various overviews. This option
-# has no effect if EXTRACT_ALL is enabled.
+# will also hide undocumented C++ concepts if enabled. This option has no effect
+# if EXTRACT_ALL is enabled.
 # The default value is: NO.
 
 HIDE_UNDOC_CLASSES     = NO
@@ -585,14 +625,15 @@ INTERNAL_DOCS          = NO
 # filesystem is case sensitive (i.e. it supports files in the same directory
 # whose names only differ in casing), the option must be set to YES to properly
 # deal with such files in case they appear in the input. For filesystems that
-# are not case sensitive the option should be be set to NO to properly deal with
+# are not case sensitive the option should be set to NO to properly deal with
 # output files written for symbols that only differ in casing, such as for two
 # classes, one named CLASS and the other named Class, and to also support
 # references to files without having to specify the exact matching casing. On
 # Windows (including Cygwin) and MacOS, users should typically set this option
 # to NO, whereas on Linux or other Unix flavors it should typically be set to
 # YES.
-# The default value is: system dependent.
+# Possible values are: SYSTEM, NO and YES.
+# The default value is: SYSTEM.
 
 CASE_SENSE_NAMES       = NO
 
@@ -610,6 +651,12 @@ HIDE_SCOPE_NAMES       = NO
 
 HIDE_COMPOUND_REFERENCE= NO
 
+# If the SHOW_HEADERFILE tag is set to YES then the documentation for a class
+# will show which file needs to be included to use the class.
+# The default value is: YES.
+
+SHOW_HEADERFILE        = YES
+
 # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
 # the files that are included by a file in the documentation of that file.
 # The default value is: YES.
@@ -767,7 +814,8 @@ FILE_VERSION_FILTER    =
 # output files in an output format independent way. To create the layout file
 # that represents doxygen's defaults, run doxygen with the -l option. You can
 # optionally specify a file name after the option, if omitted DoxygenLayout.xml
-# will be used as the name of the layout file.
+# will be used as the name of the layout file. See also section "Changing the
+# layout of pages" for information.
 #
 # Note that if you run doxygen from a directory containing a file called
 # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
@@ -813,27 +861,50 @@ WARNINGS               = YES
 WARN_IF_UNDOCUMENTED   = YES
 
 # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
-# potential errors in the documentation, such as not documenting some parameters
-# in a documented function, or documenting parameters that don't exist or using
-# markup commands wrongly.
+# potential errors in the documentation, such as documenting some parameters in
+# a documented function twice, or documenting parameters that don't exist or
+# using markup commands wrongly.
 # The default value is: YES.
 
 WARN_IF_DOC_ERROR      = YES
 
+# If WARN_IF_INCOMPLETE_DOC is set to YES, doxygen will warn about incomplete
+# function parameter documentation. If set to NO, doxygen will accept that some
+# parameters have no documentation without warning.
+# The default value is: YES.
+
+WARN_IF_INCOMPLETE_DOC = YES
+
 # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
 # are documented, but have no documentation for their parameters or return
-# value. If set to NO, doxygen will only warn about wrong or incomplete
-# parameter documentation, but not about the absence of documentation. If
-# EXTRACT_ALL is set to YES then this flag will automatically be disabled.
+# value. If set to NO, doxygen will only warn about wrong parameter
+# documentation, but not about the absence of documentation. If EXTRACT_ALL is
+# set to YES then this flag will automatically be disabled. See also
+# WARN_IF_INCOMPLETE_DOC
 # The default value is: NO.
 
 WARN_NO_PARAMDOC       = NO
 
+# If WARN_IF_UNDOC_ENUM_VAL option is set to YES, doxygen will warn about
+# undocumented enumeration values. If set to NO, doxygen will accept
+# undocumented enumeration values. If EXTRACT_ALL is set to YES then this flag
+# will automatically be disabled.
+# The default value is: NO.
+
+WARN_IF_UNDOC_ENUM_VAL = NO
+
 # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
 # a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS
 # then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but
 # at the end of the doxygen process doxygen will return with a non-zero status.
-# Possible values are: NO, YES and FAIL_ON_WARNINGS.
+# If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS_PRINT then doxygen behaves
+# like FAIL_ON_WARNINGS but in case no WARN_LOGFILE is defined doxygen will not
+# write the warning messages in between other messages but write them at the end
+# of a run, in case a WARN_LOGFILE is defined the warning messages will be
+# besides being in the defined file also be shown at the end of a run, unless
+# the WARN_LOGFILE is defined as - i.e. standard output (stdout) in that case
+# the behavior will remain as with the setting FAIL_ON_WARNINGS.
+# Possible values are: NO, YES, FAIL_ON_WARNINGS and FAIL_ON_WARNINGS_PRINT.
 # The default value is: NO.
 
 WARN_AS_ERROR          = NO
@@ -844,13 +915,27 @@ WARN_AS_ERROR          = NO
 # and the warning text. Optionally the format may contain $version, which will
 # be replaced by the version of the file (if it could be obtained via
 # FILE_VERSION_FILTER)
+# See also: WARN_LINE_FORMAT
 # The default value is: $file:$line: $text.
 
 WARN_FORMAT            = "$file:$line: $text"
 
+# In the $text part of the WARN_FORMAT command it is possible that a reference
+# to a more specific place is given. To make it easier to jump to this place
+# (outside of doxygen) the user can define a custom "cut" / "paste" string.
+# Example:
+# WARN_LINE_FORMAT = "'vi $file +$line'"
+# See also: WARN_FORMAT
+# The default value is: at line $line of file $file.
+
+WARN_LINE_FORMAT       = "at line $line of file $file"
+
 # The WARN_LOGFILE tag can be used to specify a file to which warning and error
 # messages should be written. If left blank the output is written to standard
-# error (stderr).
+# error (stderr). In case the file specified cannot be opened for writing the
+# warning and error messages are written to standard error. When as file - is
+# specified the warning and error messages are written to standard output
+# (stdout).
 
 WARN_LOGFILE           =
 
@@ -871,10 +956,21 @@ INPUT                  = mimalloc-doc.h
 # libiconv (or the iconv built into libc) for the transcoding. See the libiconv
 # documentation (see:
 # https://www.gnu.org/software/libiconv/) for the list of possible encodings.
+# See also: INPUT_FILE_ENCODING
 # The default value is: UTF-8.
 
 INPUT_ENCODING         = UTF-8
 
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses The INPUT_FILE_ENCODING tag can be used to specify
+# character encoding on a per file pattern basis. Doxygen will compare the file
+# name with each pattern and apply the encoding instead of the default
+# INPUT_ENCODING) if there is a match. The character encodings are a list of the
+# form: pattern=encoding (like *.php=ISO-8859-1).
+# See also: INPUT_ENCODING for further information on supported encodings.
+
+INPUT_FILE_ENCODING    =
+
 # If the value of the INPUT tag contains directories, you can use the
 # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and
 # *.h) to filter out the source-files in the directories.
@@ -886,12 +982,12 @@ INPUT_ENCODING         = UTF-8
 # Note the list of default checked file patterns might differ from the list of
 # default file extension mappings.
 #
-# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
-# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
-# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,
-# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment),
-# *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, *.vhdl,
-# *.ucf, *.qsf and *.ice.
+# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cxxm,
+# *.cpp, *.cppm, *.ccm, *.c++, *.c++m, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl,
+# *.idl, *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp, *.h++, *.ixx, *.l, *.cs, *.d,
+# *.php, *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown, *.md, *.mm, *.dox (to
+# be provided as doxygen C comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08,
+# *.f18, *.f, *.for, *.vhd, *.vhdl, *.ucf, *.qsf and *.ice.
 
 FILE_PATTERNS          = *.c \
                          *.cc \
@@ -973,10 +1069,7 @@ EXCLUDE_PATTERNS       =
 # (namespaces, classes, functions, etc.) that should be excluded from the
 # output. The symbol name can be a fully qualified name, a word, or if the
 # wildcard * is used, a substring. Examples: ANamespace, AClass,
-# AClass::ANamespace, ANamespace::*Test
-#
-# Note that the wildcards are matched against the file with absolute path, so to
-# exclude all test directories use the pattern */test/*
+# ANamespace::AClass, ANamespace::*Test
 
 EXCLUDE_SYMBOLS        =
 
@@ -1021,6 +1114,11 @@ IMAGE_PATH             =
 # code is scanned, but not when the output code is generated. If lines are added
 # or removed, the anchors will not be placed correctly.
 #
+# Note that doxygen will use the data processed and written to standard output
+# for further processing, therefore nothing else, like debug statements or used
+# commands (so in case of a Windows batch file always use @echo OFF), should be
+# written to standard output.
+#
 # Note that for custom extensions or not directly supported extensions you also
 # need to set EXTENSION_MAPPING for the extension otherwise the files are not
 # properly processed by doxygen.
@@ -1062,6 +1160,15 @@ FILTER_SOURCE_PATTERNS =
 
 USE_MDFILE_AS_MAINPAGE =
 
+# The Fortran standard specifies that for fixed formatted Fortran code all
+# characters from position 72 are to be considered as comment. A common
+# extension is to allow longer lines before the automatic comment starts. The
+# setting FORTRAN_COMMENT_AFTER will also make it possible that longer lines can
+# be processed before the automatic comment starts.
+# Minimum value: 7, maximum value: 10000, default value: 72.
+
+FORTRAN_COMMENT_AFTER  = 72
+
 #---------------------------------------------------------------------------
 # Configuration options related to source browsing
 #---------------------------------------------------------------------------
@@ -1076,7 +1183,8 @@ USE_MDFILE_AS_MAINPAGE =
 SOURCE_BROWSER         = NO
 
 # Setting the INLINE_SOURCES tag to YES will include the body of functions,
-# classes and enums directly into the documentation.
+# multi-line macros, enums or list initialized variables directly into the
+# documentation.
 # The default value is: NO.
 
 INLINE_SOURCES         = NO
@@ -1159,9 +1267,11 @@ VERBATIM_HEADERS       = YES
 
 CLANG_ASSISTED_PARSING = NO
 
-# If clang assisted parsing is enabled and the CLANG_ADD_INC_PATHS tag is set to
-# YES then doxygen will add the directory of each input to the include path.
+# If the CLANG_ASSISTED_PARSING tag is set to YES and the CLANG_ADD_INC_PATHS
+# tag is set to YES then doxygen will add the directory of each input to the
+# include path.
 # The default value is: YES.
+# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES.
 
 CLANG_ADD_INC_PATHS    = YES
 
@@ -1197,10 +1307,11 @@ CLANG_DATABASE_PATH    =
 
 ALPHABETICAL_INDEX     = YES
 
-# In case all classes in a project start with a common prefix, all classes will
-# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
-# can be used to specify a prefix (or a list of prefixes) that should be ignored
-# while generating the index headers.
+# The IGNORE_PREFIX tag can be used to specify a prefix (or a list of prefixes)
+# that should be ignored while generating the index headers. The IGNORE_PREFIX
+# tag works for classes, function and member names. The entity will be placed in
+# the alphabetical list under the first letter of the entity name that remains
+# after removing the prefix.
 # This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
 
 IGNORE_PREFIX          =
@@ -1279,7 +1390,12 @@ HTML_STYLESHEET        =
 # Doxygen will copy the style sheet files to the output directory.
 # Note: The order of the extra style sheet files is of importance (e.g. the last
 # style sheet in the list overrules the setting of the previous ones in the
-# list). For an example see the documentation.
+# list).
+# Note: Since the styling of scrollbars can currently not be overruled in
+# Webkit/Chromium, the styling will be left out of the default doxygen.css if
+# one or more extra stylesheets have been specified. So if scrollbar
+# customization is desired it has to be added explicitly. For an example see the
+# documentation.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
 HTML_EXTRA_STYLESHEET  = mimalloc-doxygen.css
@@ -1294,9 +1410,22 @@ HTML_EXTRA_STYLESHEET  = mimalloc-doxygen.css
 
 HTML_EXTRA_FILES       =
 
+# The HTML_COLORSTYLE tag can be used to specify if the generated HTML output
+# should be rendered with a dark or light theme.
+# Possible values are: LIGHT always generates light mode output, DARK always
+# generates dark mode output, AUTO_LIGHT automatically sets the mode according
+# to the user preference, uses light mode if no preference is set (the default),
+# AUTO_DARK automatically sets the mode according to the user preference, uses
+# dark mode if no preference is set and TOGGLE allows a user to switch between
+# light and dark mode via a button.
+# The default value is: AUTO_LIGHT.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE        = LIGHT
+
 # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
 # will adjust the colors in the style sheet and background images according to
-# this color. Hue is specified as an angle on a colorwheel, see
+# this color. Hue is specified as an angle on a color-wheel, see
 # https://en.wikipedia.org/wiki/Hue for more information. For instance the value
 # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
 # purple, and 360 is red again.
@@ -1306,7 +1435,7 @@ HTML_EXTRA_FILES       =
 HTML_COLORSTYLE_HUE    = 189
 
 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors
-# in the HTML output. For a value of 0 the output will use grayscales only. A
+# in the HTML output. For a value of 0 the output will use gray-scales only. A
 # value of 255 will produce the most vivid colors.
 # Minimum value: 0, maximum value: 255, default value: 100.
 # This tag requires that the tag GENERATE_HTML is set to YES.
@@ -1324,15 +1453,6 @@ HTML_COLORSTYLE_SAT    = 12
 
 HTML_COLORSTYLE_GAMMA  = 240
 
-# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
-# page will contain the date and time when the page was generated. Setting this
-# to YES can help to show when doxygen was last run and thus if the
-# documentation is up to date.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_TIMESTAMP         = NO
-
 # If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML
 # documentation will contain a main index with vertical navigation menus that
 # are dynamically created via JavaScript. If disabled, the navigation index will
@@ -1352,6 +1472,33 @@ HTML_DYNAMIC_MENUS     = NO
 
 HTML_DYNAMIC_SECTIONS  = NO
 
+# If the HTML_CODE_FOLDING tag is set to YES then classes and functions can be
+# dynamically folded and expanded in the generated HTML source code.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_CODE_FOLDING      = YES
+
+# If the HTML_COPY_CLIPBOARD tag is set to YES then doxygen will show an icon in
+# the top right corner of code and text fragments that allows the user to copy
+# its content to the clipboard. Note this only works if supported by the browser
+# and the web page is served via a secure context (see:
+# https://www.w3.org/TR/secure-contexts/), i.e. using the https: or file:
+# protocol.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COPY_CLIPBOARD    = YES
+
+# Doxygen stores a couple of settings persistently in the browser (via e.g.
+# cookies). By default these settings apply to all HTML pages generated by
+# doxygen across all projects. The HTML_PROJECT_COOKIE tag can be used to store
+# the settings under a project specific key, such that the user preferences will
+# be stored separately.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_PROJECT_COOKIE    =
+
 # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
 # shown in the various tree structured indices initially; the user can expand
 # and collapse entries dynamically later on. Doxygen will expand the tree to
@@ -1388,6 +1535,13 @@ GENERATE_DOCSET        = NO
 
 DOCSET_FEEDNAME        = "Doxygen generated docs"
 
+# This tag determines the URL of the docset feed. A documentation feed provides
+# an umbrella under which multiple documentation sets from a single provider
+# (such as a company or product suite) can be grouped.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_FEEDURL         =
+
 # This tag specifies a string that should uniquely identify the documentation
 # set bundle. This should be a reverse domain-name style string, e.g.
 # com.mycompany.MyDocSet. Doxygen will append .docset to the name.
@@ -1413,8 +1567,12 @@ DOCSET_PUBLISHER_NAME  = Publisher
 # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
 # additional HTML index files: index.hhp, index.hhc, and index.hhk. The
 # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
-# (see:
-# https://www.microsoft.com/en-us/download/details.aspx?id=21138) on Windows.
+# on Windows. In the beginning of 2021 Microsoft took the original page, with
+# a.o. the download links, offline the HTML help workshop was already many years
+# in maintenance mode). You can download the HTML help workshop from the web
+# archives at Installation executable (see:
+# http://web.archive.org/web/20160201063255/http://download.microsoft.com/downlo
+# ad/0/A/9/0A939EF6-E31C-430F-A3DF-DFAE7960D564/htmlhelp.exe).
 #
 # The HTML Help Workshop contains a compiler that can convert all HTML output
 # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
@@ -1471,6 +1629,16 @@ BINARY_TOC             = NO
 
 TOC_EXPAND             = NO
 
+# The SITEMAP_URL tag is used to specify the full URL of the place where the
+# generated documentation will be placed on the server by the user during the
+# deployment of the documentation. The generated sitemap is called sitemap.xml
+# and placed on the directory specified by HTML_OUTPUT. In case no SITEMAP_URL
+# is specified no sitemap is generated. For information about the sitemap
+# protocol see https://www.sitemaps.org
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+SITEMAP_URL            =
+
 # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
 # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that
 # can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help
@@ -1573,16 +1741,28 @@ DISABLE_INDEX          = YES
 # to work a browser that supports JavaScript, DHTML, CSS and frames is required
 # (i.e. any modern browser). Windows users are probably better off using the
 # HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can
-# further fine-tune the look of the index. As an example, the default style
-# sheet generated by doxygen has an example that shows how to put an image at
-# the root of the tree instead of the PROJECT_NAME. Since the tree basically has
-# the same information as the tab index, you could consider setting
-# DISABLE_INDEX to YES when enabling this option.
+# further fine tune the look of the index (see "Fine-tuning the output"). As an
+# example, the default style sheet generated by doxygen has an example that
+# shows how to put an image at the root of the tree instead of the PROJECT_NAME.
+# Since the tree basically has the same information as the tab index, you could
+# consider setting DISABLE_INDEX to YES when enabling this option.
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
 GENERATE_TREEVIEW      = YES
 
+# When both GENERATE_TREEVIEW and DISABLE_INDEX are set to YES, then the
+# FULL_SIDEBAR option determines if the side bar is limited to only the treeview
+# area (value NO) or if it should extend to the full height of the window (value
+# YES). Setting this to YES gives a layout similar to
+# https://docs.readthedocs.io with more room for contents, but less room for the
+# project logo, title, and description. If either GENERATE_TREEVIEW or
+# DISABLE_INDEX is set to NO, this option has no effect.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FULL_SIDEBAR           = NO
+
 # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
 # doxygen will group on one line in the generated HTML documentation.
 #
@@ -1607,6 +1787,13 @@ TREEVIEW_WIDTH         = 180
 
 EXT_LINKS_IN_WINDOW    = NO
 
+# If the OBFUSCATE_EMAILS tag is set to YES, doxygen will obfuscate email
+# addresses.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+OBFUSCATE_EMAILS       = YES
+
 # If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg
 # tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see
 # https://inkscape.org) to generate formulas as SVG images instead of PNGs for
@@ -1627,17 +1814,6 @@ HTML_FORMULA_FORMAT    = png
 
 FORMULA_FONTSIZE       = 10
 
-# Use the FORMULA_TRANSPARENT tag to determine whether or not the images
-# generated for formulas are transparent PNGs. Transparent PNGs are not
-# supported properly for IE 6.0, but are supported on all modern browsers.
-#
-# Note that when changing this option you need to delete any form_*.png files in
-# the HTML output directory before the changes have effect.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-FORMULA_TRANSPARENT    = YES
-
 # The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands
 # to create new LaTeX commands to be used in formulas as building blocks. See
 # the section "Including formulas" for details.
@@ -1655,11 +1831,29 @@ FORMULA_MACROFILE      =
 
 USE_MATHJAX            = NO
 
+# With MATHJAX_VERSION it is possible to specify the MathJax version to be used.
+# Note that the different versions of MathJax have different requirements with
+# regards to the different settings, so it is possible that also other MathJax
+# settings have to be changed when switching between the different MathJax
+# versions.
+# Possible values are: MathJax_2 and MathJax_3.
+# The default value is: MathJax_2.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_VERSION        = MathJax_2
+
 # When MathJax is enabled you can set the default output format to be used for
-# the MathJax output. See the MathJax site (see:
-# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details.
+# the MathJax output. For more details about the output format see MathJax
+# version 2 (see:
+# http://docs.mathjax.org/en/v2.7-latest/output.html) and MathJax version 3
+# (see:
+# http://docs.mathjax.org/en/latest/web/components/output.html).
 # Possible values are: HTML-CSS (which is slower, but has the best
-# compatibility), NativeMML (i.e. MathML) and SVG.
+# compatibility. This is the name for Mathjax version 2, for MathJax version 3
+# this will be translated into chtml), NativeMML (i.e. MathML. Only supported
+# for MathJax 2. For MathJax version 3 chtml will be used instead.), chtml (This
+# is the name for Mathjax version 3, for MathJax version 2 this will be
+# translated into HTML-CSS) and SVG.
 # The default value is: HTML-CSS.
 # This tag requires that the tag USE_MATHJAX is set to YES.
 
@@ -1672,15 +1866,21 @@ MATHJAX_FORMAT         = HTML-CSS
 # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
 # Content Delivery Network so you can quickly see the result without installing
 # MathJax. However, it is strongly recommended to install a local copy of
-# MathJax from https://www.mathjax.org before deployment.
-# The default value is: https://cdn.jsdelivr.net/npm/mathjax@2.
+# MathJax from https://www.mathjax.org before deployment. The default value is:
+# - in case of MathJax version 2: https://cdn.jsdelivr.net/npm/mathjax@2
+# - in case of MathJax version 3: https://cdn.jsdelivr.net/npm/mathjax@3
 # This tag requires that the tag USE_MATHJAX is set to YES.
 
 MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest
 
 # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
 # extension names that should be enabled during MathJax rendering. For example
+# for MathJax version 2 (see
+# https://docs.mathjax.org/en/v2.7-latest/tex.html#tex-and-latex-extensions):
 # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
+# For example for MathJax version 3 (see
+# http://docs.mathjax.org/en/latest/input/tex/extensions/index.html):
+# MATHJAX_EXTENSIONS = ams
 # This tag requires that the tag USE_MATHJAX is set to YES.
 
 MATHJAX_EXTENSIONS     =
@@ -1860,29 +2060,31 @@ PAPER_TYPE             = a4
 
 EXTRA_PACKAGES         =
 
-# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the
-# generated LaTeX document. The header should contain everything until the first
-# chapter. If it is left blank doxygen will generate a standard header. See
-# section "Doxygen usage" for information on how to let doxygen write the
-# default header to a separate file.
+# The LATEX_HEADER tag can be used to specify a user-defined LaTeX header for
+# the generated LaTeX document. The header should contain everything until the
+# first chapter. If it is left blank doxygen will generate a standard header. It
+# is highly recommended to start with a default header using
+# doxygen -w latex new_header.tex new_footer.tex new_stylesheet.sty
+# and then modify the file new_header.tex. See also section "Doxygen usage" for
+# information on how to generate the default header that doxygen normally uses.
 #
-# Note: Only use a user-defined header if you know what you are doing! The
-# following commands have a special meaning inside the header: $title,
-# $datetime, $date, $doxygenversion, $projectname, $projectnumber,
-# $projectbrief, $projectlogo. Doxygen will replace $title with the empty
-# string, for the replacement values of the other commands the user is referred
-# to HTML_HEADER.
+# Note: Only use a user-defined header if you know what you are doing!
+# Note: The header is subject to change so you typically have to regenerate the
+# default header when upgrading to a newer version of doxygen. The following
+# commands have a special meaning inside the header (and footer): For a
+# description of the possible markers and block names see the documentation.
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
 LATEX_HEADER           =
 
-# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the
-# generated LaTeX document. The footer should contain everything after the last
-# chapter. If it is left blank doxygen will generate a standard footer. See
+# The LATEX_FOOTER tag can be used to specify a user-defined LaTeX footer for
+# the generated LaTeX document. The footer should contain everything after the
+# last chapter. If it is left blank doxygen will generate a standard footer. See
 # LATEX_HEADER for more information on how to generate a default footer and what
-# special commands can be used inside the footer.
-#
-# Note: Only use a user-defined footer if you know what you are doing!
+# special commands can be used inside the footer. See also section "Doxygen
+# usage" for information on how to generate the default footer that doxygen
+# normally uses. Note: Only use a user-defined footer if you know what you are
+# doing!
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
 LATEX_FOOTER           =
@@ -1925,10 +2127,16 @@ PDF_HYPERLINKS         = YES
 
 USE_PDFLATEX           = YES
 
-# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode
-# command to the generated LaTeX files. This will instruct LaTeX to keep running
-# if errors occur, instead of asking the user for help. This option is also used
-# when generating formulas in HTML.
+# The LATEX_BATCHMODE tag signals the behavior of LaTeX in case of an error.
+# Possible values are: NO same as ERROR_STOP, YES same as BATCH, BATCH In batch
+# mode nothing is printed on the terminal, errors are scrolled as if <return> is
+# hit at every error; missing files that TeX tries to input or request from
+# keyboard input (\read on a not open input stream) cause the job to abort,
+# NON_STOP In nonstop mode the diagnostic message will appear on the terminal,
+# but there is no possibility of user interaction just like in batch mode,
+# SCROLL In scroll mode, TeX will stop only for missing files to input or if
+# keyboard input is necessary and ERROR_STOP In errorstop mode, TeX will stop at
+# each error, asking for user intervention.
 # The default value is: NO.
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
@@ -1941,16 +2149,6 @@ LATEX_BATCHMODE        = NO
 
 LATEX_HIDE_INDICES     = NO
 
-# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source
-# code with syntax highlighting in the LaTeX output.
-#
-# Note that which sources are shown also depends on other settings such as
-# SOURCE_BROWSER.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_SOURCE_CODE      = NO
-
 # The LATEX_BIB_STYLE tag can be used to specify the style to use for the
 # bibliography, e.g. plainnat, or ieeetr. See
 # https://en.wikipedia.org/wiki/BibTeX and \cite for more info.
@@ -1959,14 +2157,6 @@ LATEX_SOURCE_CODE      = NO
 
 LATEX_BIB_STYLE        = plain
 
-# If the LATEX_TIMESTAMP tag is set to YES then the footer of each generated
-# page will contain the date and time when the page was generated. Setting this
-# to NO can help when comparing the output of multiple runs.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_TIMESTAMP        = NO
-
 # The LATEX_EMOJI_DIRECTORY tag is used to specify the (relative or absolute)
 # path from which the emoji images will be read. If a relative path is entered,
 # it will be relative to the LATEX_OUTPUT directory. If left blank the
@@ -2031,15 +2221,13 @@ RTF_STYLESHEET_FILE    =
 
 RTF_EXTENSIONS_FILE    =
 
-# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code
-# with syntax highlighting in the RTF output.
-#
-# Note that which sources are shown also depends on other settings such as
-# SOURCE_BROWSER.
-# The default value is: NO.
+# The RTF_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the RTF_OUTPUT output directory.
+# Note that the files will be copied as-is; there are no commands or markers
+# available.
 # This tag requires that the tag GENERATE_RTF is set to YES.
 
-RTF_SOURCE_CODE        = NO
+RTF_EXTRA_FILES        =
 
 #---------------------------------------------------------------------------
 # Configuration options related to the man page output
@@ -2137,21 +2325,12 @@ GENERATE_DOCBOOK       = NO
 
 DOCBOOK_OUTPUT         = docbook
 
-# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the
-# program listings (including syntax highlighting and cross-referencing
-# information) to the DOCBOOK output. Note that enabling this will significantly
-# increase the size of the DOCBOOK output.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
-
-DOCBOOK_PROGRAMLISTING = NO
-
 #---------------------------------------------------------------------------
 # Configuration options for the AutoGen Definitions output
 #---------------------------------------------------------------------------
 
 # If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an
-# AutoGen Definitions (see http://autogen.sourceforge.net/) file that captures
+# AutoGen Definitions (see https://autogen.sourceforge.net/) file that captures
 # the structure of the code including all documentation. Note that this feature
 # is still experimental and incomplete at the moment.
 # The default value is: NO.
@@ -2162,6 +2341,28 @@ GENERATE_AUTOGEN_DEF   = NO
 # Configuration options related to Sqlite3 output
 #---------------------------------------------------------------------------
 
+# If the GENERATE_SQLITE3 tag is set to YES doxygen will generate a Sqlite3
+# database with symbols found by doxygen stored in tables.
+# The default value is: NO.
+
+GENERATE_SQLITE3       = NO
+
+# The SQLITE3_OUTPUT tag is used to specify where the Sqlite3 database will be
+# put. If a relative path is entered the value of OUTPUT_DIRECTORY will be put
+# in front of it.
+# The default directory is: sqlite3.
+# This tag requires that the tag GENERATE_SQLITE3 is set to YES.
+
+SQLITE3_OUTPUT         = sqlite3
+
+# The SQLITE3_RECREATE_DB tag is set to YES, the existing doxygen_sqlite3.db
+# database file will be recreated with each doxygen run. If set to NO, doxygen
+# will warn if a database file is already found and not modify it.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_SQLITE3 is set to YES.
+
+SQLITE3_RECREATE_DB    = YES
+
 #---------------------------------------------------------------------------
 # Configuration options related to the Perl module output
 #---------------------------------------------------------------------------
@@ -2236,7 +2437,8 @@ SEARCH_INCLUDES        = YES
 
 # The INCLUDE_PATH tag can be used to specify one or more directories that
 # contain include files that are not input files but should be processed by the
-# preprocessor.
+# preprocessor. Note that the INCLUDE_PATH is not recursive, so the setting of
+# RECURSIVE has no effect here.
 # This tag requires that the tag SEARCH_INCLUDES is set to YES.
 
 INCLUDE_PATH           =
@@ -2303,15 +2505,15 @@ TAGFILES               =
 
 GENERATE_TAGFILE       =
 
-# If the ALLEXTERNALS tag is set to YES, all external class will be listed in
-# the class index. If set to NO, only the inherited external classes will be
-# listed.
+# If the ALLEXTERNALS tag is set to YES, all external classes and namespaces
+# will be listed in the class and namespace index. If set to NO, only the
+# inherited external classes will be listed.
 # The default value is: NO.
 
 ALLEXTERNALS           = NO
 
 # If the EXTERNAL_GROUPS tag is set to YES, all external groups will be listed
-# in the modules index. If set to NO, only the current project's groups will be
+# in the topic index. If set to NO, only the current project's groups will be
 # listed.
 # The default value is: YES.
 
@@ -2325,25 +2527,9 @@ EXTERNAL_GROUPS        = YES
 EXTERNAL_PAGES         = YES
 
 #---------------------------------------------------------------------------
-# Configuration options related to the dot tool
+# Configuration options related to diagram generator tools
 #---------------------------------------------------------------------------
 
-# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram
-# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to
-# NO turns the diagrams off. Note that this option also works with HAVE_DOT
-# disabled, but it is recommended to install and use dot, since it yields more
-# powerful graphs.
-# The default value is: YES.
-
-CLASS_DIAGRAMS         = YES
-
-# You can include diagrams made with dia in doxygen documentation. Doxygen will
-# then run dia to produce the diagram and insert it in the documentation. The
-# DIA_PATH tag allows you to specify the directory where the dia binary resides.
-# If left empty dia is assumed to be found in the default search path.
-
-DIA_PATH               =
-
 # If set to YES the inheritance and collaboration graphs will hide inheritance
 # and usage relations if the target is undocumented or is not a class.
 # The default value is: YES.
@@ -2352,7 +2538,7 @@ HIDE_UNDOC_RELATIONS   = YES
 
 # If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
 # available from the path. This tool is part of Graphviz (see:
-# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
+# https://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
 # Bell Labs. The other options in this section have no effect if this option is
 # set to NO
 # The default value is: NO.
@@ -2369,49 +2555,77 @@ HAVE_DOT               = NO
 
 DOT_NUM_THREADS        = 0
 
-# When you want a differently looking font in the dot files that doxygen
-# generates you can specify the font name using DOT_FONTNAME. You need to make
-# sure dot is able to find the font, which can be done by putting it in a
-# standard location or by setting the DOTFONTPATH environment variable or by
-# setting DOT_FONTPATH to the directory containing the font.
-# The default value is: Helvetica.
+# DOT_COMMON_ATTR is common attributes for nodes, edges and labels of
+# subgraphs. When you want a differently looking font in the dot files that
+# doxygen generates you can specify fontname, fontcolor and fontsize attributes.
+# For details please see <a href=https://graphviz.org/doc/info/attrs.html>Node,
+# Edge and Graph Attributes specification</a> You need to make sure dot is able
+# to find the font, which can be done by putting it in a standard location or by
+# setting the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the
+# directory containing the font. Default graphviz fontsize is 14.
+# The default value is: fontname=Helvetica,fontsize=10.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-DOT_FONTNAME           = Helvetica
+DOT_COMMON_ATTR        = "fontname=Helvetica,fontsize=10"
 
-# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of
-# dot graphs.
-# Minimum value: 4, maximum value: 24, default value: 10.
+# DOT_EDGE_ATTR is concatenated with DOT_COMMON_ATTR. For elegant style you can
+# add 'arrowhead=open, arrowtail=open, arrowsize=0.5'. <a
+# href=https://graphviz.org/doc/info/arrows.html>Complete documentation about
+# arrows shapes.</a>
+# The default value is: labelfontname=Helvetica,labelfontsize=10.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-DOT_FONTSIZE           = 10
+DOT_EDGE_ATTR          = "labelfontname=Helvetica,labelfontsize=10"
 
-# By default doxygen will tell dot to use the default font as specified with
-# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set
-# the path where dot can find it using this tag.
+# DOT_NODE_ATTR is concatenated with DOT_COMMON_ATTR. For view without boxes
+# around nodes set 'shape=plain' or 'shape=plaintext' <a
+# href=https://www.graphviz.org/doc/info/shapes.html>Shapes specification</a>
+# The default value is: shape=box,height=0.2,width=0.4.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_NODE_ATTR          = "shape=box,height=0.2,width=0.4"
+
+# You can set the path where dot can find font specified with fontname in
+# DOT_COMMON_ATTR and others dot attributes.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
 DOT_FONTPATH           =
 
-# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for
-# each documented class showing the direct and indirect inheritance relations.
-# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO.
+# If the CLASS_GRAPH tag is set to YES or GRAPH or BUILTIN then doxygen will
+# generate a graph for each documented class showing the direct and indirect
+# inheritance relations. In case the CLASS_GRAPH tag is set to YES or GRAPH and
+# HAVE_DOT is enabled as well, then dot will be used to draw the graph. In case
+# the CLASS_GRAPH tag is set to YES and HAVE_DOT is disabled or if the
+# CLASS_GRAPH tag is set to BUILTIN, then the built-in generator will be used.
+# If the CLASS_GRAPH tag is set to TEXT the direct and indirect inheritance
+# relations will be shown as texts / links. Explicit enabling an inheritance
+# graph or choosing a different representation for an inheritance graph of a
+# specific class, can be accomplished by means of the command \inheritancegraph.
+# Disabling an inheritance graph can be accomplished by means of the command
+# \hideinheritancegraph.
+# Possible values are: NO, YES, TEXT, GRAPH and BUILTIN.
 # The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
 
 CLASS_GRAPH            = YES
 
 # If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a
 # graph for each documented class showing the direct and indirect implementation
 # dependencies (inheritance, containment, and class references variables) of the
-# class with other documented classes.
+# class with other documented classes. Explicit enabling a collaboration graph,
+# when COLLABORATION_GRAPH is set to NO, can be accomplished by means of the
+# command \collaborationgraph. Disabling a collaboration graph can be
+# accomplished by means of the command \hidecollaborationgraph.
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
 COLLABORATION_GRAPH    = YES
 
 # If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for
-# groups, showing the direct groups dependencies.
+# groups, showing the direct groups dependencies. Explicit enabling a group
+# dependency graph, when GROUP_GRAPHS is set to NO, can be accomplished by means
+# of the command \groupgraph. Disabling a directory graph can be accomplished by
+# means of the command \hidegroupgraph. See also the chapter Grouping in the
+# manual.
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
@@ -2453,8 +2667,8 @@ DOT_UML_DETAILS        = NO
 
 # The DOT_WRAP_THRESHOLD tag can be used to set the maximum number of characters
 # to display on a single line. If the actual line length exceeds this threshold
-# significantly it will wrapped across multiple lines. Some heuristics are apply
-# to avoid ugly line breaks.
+# significantly it will be wrapped across multiple lines. Some heuristics are
+# applied to avoid ugly line breaks.
 # Minimum value: 0, maximum value: 1000, default value: 17.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
@@ -2471,7 +2685,9 @@ TEMPLATE_RELATIONS     = NO
 # If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to
 # YES then doxygen will generate a graph for each documented file showing the
 # direct and indirect include dependencies of the file with other documented
-# files.
+# files. Explicit enabling an include graph, when INCLUDE_GRAPH is is set to NO,
+# can be accomplished by means of the command \includegraph. Disabling an
+# include graph can be accomplished by means of the command \hideincludegraph.
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
@@ -2480,7 +2696,10 @@ INCLUDE_GRAPH          = YES
 # If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are
 # set to YES then doxygen will generate a graph for each documented file showing
 # the direct and indirect include dependencies of the file with other documented
-# files.
+# files. Explicit enabling an included by graph, when INCLUDED_BY_GRAPH is set
+# to NO, can be accomplished by means of the command \includedbygraph. Disabling
+# an included by graph can be accomplished by means of the command
+# \hideincludedbygraph.
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
@@ -2520,16 +2739,26 @@ GRAPHICAL_HIERARCHY    = YES
 # If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the
 # dependencies a directory has on other directories in a graphical way. The
 # dependency relations are determined by the #include relations between the
-# files in the directories.
+# files in the directories. Explicit enabling a directory graph, when
+# DIRECTORY_GRAPH is set to NO, can be accomplished by means of the command
+# \directorygraph. Disabling a directory graph can be accomplished by means of
+# the command \hidedirectorygraph.
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
 DIRECTORY_GRAPH        = YES
 
+# The DIR_GRAPH_MAX_DEPTH tag can be used to limit the maximum number of levels
+# of child directories generated in directory dependency graphs by dot.
+# Minimum value: 1, maximum value: 25, default value: 1.
+# This tag requires that the tag DIRECTORY_GRAPH is set to YES.
+
+DIR_GRAPH_MAX_DEPTH    = 1
+
 # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
 # generated by dot. For an explanation of the image formats see the section
 # output formats in the documentation of the dot tool (Graphviz (see:
-# http://www.graphviz.org/)).
+# https://www.graphviz.org/)).
 # Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order
 # to make the SVG files visible in IE 9+ (other browsers do not have this
 # requirement).
@@ -2566,11 +2795,12 @@ DOT_PATH               =
 
 DOTFILE_DIRS           =
 
-# The MSCFILE_DIRS tag can be used to specify one or more directories that
-# contain msc files that are included in the documentation (see the \mscfile
-# command).
+# You can include diagrams made with dia in doxygen documentation. Doxygen will
+# then run dia to produce the diagram and insert it in the documentation. The
+# DIA_PATH tag allows you to specify the directory where the dia binary resides.
+# If left empty dia is assumed to be found in the default search path.
 
-MSCFILE_DIRS           =
+DIA_PATH               =
 
 # The DIAFILE_DIRS tag can be used to specify one or more directories that
 # contain dia files that are included in the documentation (see the \diafile
@@ -2579,10 +2809,10 @@ MSCFILE_DIRS           =
 DIAFILE_DIRS           =
 
 # When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the
-# path where java can find the plantuml.jar file. If left blank, it is assumed
-# PlantUML is not used or called during a preprocessing step. Doxygen will
-# generate a warning when it encounters a \startuml command in this case and
-# will not generate output for the diagram.
+# path where java can find the plantuml.jar file or to the filename of jar file
+# to be used. If left blank, it is assumed PlantUML is not used or called during
+# a preprocessing step. Doxygen will generate a warning when it encounters a
+# \startuml command in this case and will not generate output for the diagram.
 
 PLANTUML_JAR_PATH      =
 
@@ -2599,7 +2829,7 @@ PLANTUML_INCLUDE_PATH  =
 # The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes
 # that will be shown in the graph. If the number of nodes in a graph becomes
 # larger than this value, doxygen will truncate the graph, which is visualized
-# by representing a node as a red box. Note that doxygen if the number of direct
+# by representing a node as a red box. Note that if the number of direct
 # children of the root node in a graph is already larger than
 # DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note that
 # the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
@@ -2620,18 +2850,6 @@ DOT_GRAPH_MAX_NODES    = 50
 
 MAX_DOT_GRAPH_DEPTH    = 0
 
-# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
-# background. This is disabled by default, because dot on Windows does not seem
-# to support this out of the box.
-#
-# Warning: Depending on the platform used, enabling this option may lead to
-# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
-# read).
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_TRANSPARENT        = NO
-
 # Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output
 # files in one run (i.e. multiple -o and -T options on the command line). This
 # makes dot run faster, but since only newer versions of dot (>1.8.10) support
@@ -2644,6 +2862,8 @@ DOT_MULTI_TARGETS      = NO
 # If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page
 # explaining the meaning of the various boxes and arrows in the dot generated
 # graphs.
+# Note: This tag requires that UML_LOOK isn't set, i.e. the doxygen internal
+# graphical representation for inheritance and collaboration diagrams is used.
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
@@ -2652,8 +2872,24 @@ GENERATE_LEGEND        = YES
 # If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate
 # files that are used to generate the various graphs.
 #
-# Note: This setting is not only used for dot files but also for msc and
-# plantuml temporary files.
+# Note: This setting is not only used for dot files but also for msc temporary
+# files.
 # The default value is: YES.
 
 DOT_CLEANUP            = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. If the MSCGEN_TOOL tag is left empty (the default), then doxygen will
+# use a built-in version of mscgen tool to produce the charts. Alternatively,
+# the MSCGEN_TOOL tag can also specify the name an external tool. For instance,
+# specifying prog as the value, doxygen will call the tool as prog -T
+# <outfile_format> -o <outputfile> <inputfile>. The external tool should support
+# output file formats "png", "eps", "svg", and "ismap".
+
+MSCGEN_TOOL            =
+
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the \mscfile
+# command).
+
+MSCFILE_DIRS           =
diff --git a/3rd/mimalloc-2.0.9/doc/ds-logo.jpg b/3rd/mimalloc-3.2.6/doc/ds-logo.jpg
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/ds-logo.jpg
rename to 3rd/mimalloc-3.2.6/doc/ds-logo.jpg
diff --git a/3rd/mimalloc-2.0.9/doc/ds-logo.png b/3rd/mimalloc-3.2.6/doc/ds-logo.png
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/ds-logo.png
rename to 3rd/mimalloc-3.2.6/doc/ds-logo.png
diff --git a/3rd/mimalloc-2.0.9/doc/mimalloc-doc.h b/3rd/mimalloc-3.2.6/doc/mimalloc-doc.h
similarity index 70%
rename from 3rd/mimalloc-2.0.9/doc/mimalloc-doc.h
rename to 3rd/mimalloc-3.2.6/doc/mimalloc-doc.h
index 38f7f7a8..bd86a6cd 100644
--- a/3rd/mimalloc-2.0.9/doc/mimalloc-doc.h
+++ b/3rd/mimalloc-3.2.6/doc/mimalloc-doc.h
@@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2025, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@@ -25,12 +25,15 @@ without code changes, for example, on Unix you can use it as:
 ```
 
 Notable aspects of the design include:
-
 - __small and consistent__: the library is about 8k LOC using simple and
   consistent data structures. This makes it very suitable
   to integrate and adapt in other projects. For runtime systems it
   provides hooks for a monotonic _heartbeat_ and deferred freeing (for
   bounded worst-case times with reference counting).
+  Partly due to its simplicity, mimalloc has been ported to many systems (Windows, macOS,
+  Linux, WASM, various BSD's, Haiku, MUSL, etc) and has excellent support for dynamic overriding.
+  At the same time, it is an industrial strength allocator that runs (very) large scale
+  distributed services on thousands of machines with excellent worst case latencies.
 - __free list sharding__: instead of one big free list (per size class) we have
   many smaller lists per "mimalloc page" which reduces fragmentation and
   increases locality --
@@ -45,23 +48,23 @@ Notable aspects of the design include:
   and the chance of contending on a single location will be low -- this is quite
   similar to randomized algorithms like skip lists where adding
   a random oracle removes the need for a more complex algorithm.
-- __eager page reset__: when a "page" becomes empty (with increased chance
-  due to free list sharding) the memory is marked to the OS as unused ("reset" or "purged")
+- __eager page purging__: when a "page" becomes empty (with increased chance
+  due to free list sharding) the memory is marked to the OS as unused (reset or decommitted)
   reducing (real) memory pressure and fragmentation, especially in long running
   programs.
-- __secure__: _mimalloc_ can be build in secure mode, adding guard pages,
+- __secure__: _mimalloc_ can be built in secure mode, adding guard pages,
   randomized allocation, encrypted free lists, etc. to protect against various
-  heap vulnerabilities. The performance penalty is only around 5% on average
+  heap vulnerabilities. The performance penalty is usually around 10% on average
   over our benchmarks.
 - __first-class heaps__: efficiently create and use multiple heaps to allocate across different regions.
   A heap can be destroyed at once instead of deallocating each object separately.
 - __bounded__: it does not suffer from _blowup_ \[1\], has bounded worst-case allocation
-  times (_wcat_), bounded space overhead (~0.2% meta-data, with low internal fragmentation),
-  and has no internal points of contention using only atomic operations.
-- __fast__: In our benchmarks (see [below](#performance)),
-  _mimalloc_ outperforms all other leading allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc),
-  and usually uses less memory (up to 25% more in the worst case). A nice property
-  is that it does consistently well over a wide range of benchmarks.
+  times (_wcat_) (upto OS primitives), bounded space overhead (~0.2% meta-data, with low
+  internal fragmentation), and has no internal points of contention using only atomic operations.
+- __fast__: In our benchmarks (see [below](#bench)),
+  _mimalloc_ outperforms other leading allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc),
+  and often uses less memory. A nice property is that it does consistently well over a wide range
+  of benchmarks. There is also good huge OS page support for larger server programs.
 
 You can read more on the design of _mimalloc_ in the
 [technical report](https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action)
@@ -168,7 +171,7 @@ void* mi_expand(void* p, size_t newsize);
 /// @returns A pointer to a block of \a count * \a size bytes, or \a NULL
 /// if out of memory or if \a count * \a size overflows.
 ///
-/// If there is no overflow, it behaves exactly like `mi_malloc(p,count*size)`.
+/// If there is no overflow, it behaves exactly like `mi_malloc(count*size)`.
 /// @see mi_calloc()
 /// @see mi_zallocn()
 void* mi_mallocn(size_t count, size_t size);
@@ -278,8 +281,7 @@ void* mi_zalloc_small(size_t size);
 /// The returned size can be
 /// used to call \a mi_expand successfully.
 /// The returned size is always at least equal to the
-/// allocated size of \a p, and, in the current design,
-/// should be less than 16.7% more.
+/// allocated size of \a p.
 ///
 /// @see [_msize](https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/msize?view=vs-2017) (Windows)
 /// @see [malloc_usable_size](http://man7.org/linux/man-pages/man3/malloc_usable_size.3.html) (Linux)
@@ -304,7 +306,7 @@ size_t mi_good_size(size_t size);
 /// in very narrow circumstances; in particular, when a long running thread
 /// allocates a lot of blocks that are freed by other threads it may improve
 /// resource usage by calling this every once in a while.
-void   mi_collect(bool force);
+void mi_collect(bool force);
 
 /// Deprecated
 /// @param out Ignored, outputs to the registered output function or stderr by default.
@@ -428,20 +430,19 @@ int  mi_reserve_os_memory(size_t size, bool commit, bool allow_large);
 /// allocated in some manner and available for use my mimalloc.
 /// @param start       Start of the memory area
 /// @param size        The size of the memory area.
-/// @param commit      Is the area already committed?
-/// @param is_large    Does it consist of large OS pages? Set this to \a true as well for memory
-///                    that should not be decommitted or protected (like rdma etc.)
+/// @param is_committed Is the area already committed?
+/// @param is_pinned   Can the memory not be decommitted or reset? (usually the case for large OS pages)
 /// @param is_zero     Does the area consists of zero's?
 /// @param numa_node   Possible associated numa node or `-1`.
 /// @return \a true if successful, and \a false on error.
-bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node);
+bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_pinned, bool is_zero, int numa_node);
 
 /// Reserve \a pages of huge OS pages (1GiB) evenly divided over \a numa_nodes nodes,
 /// but stops after at most `timeout_msecs` seconds.
 /// @param pages The number of 1GiB pages to reserve.
 /// @param numa_nodes The number of nodes do evenly divide the pages over, or 0 for using the actual number of NUMA nodes.
 /// @param timeout_msecs Maximum number of milli-seconds to try reserving, or 0 for no timeout.
-/// @returns 0 if successfull, \a ENOMEM if running out of memory, or \a ETIMEDOUT if timed out.
+/// @returns 0 if successful, \a ENOMEM if running out of memory, or \a ETIMEDOUT if timed out.
 ///
 /// The reserved memory is used by mimalloc to satisfy allocations.
 /// May quit before \a timeout_msecs are expired if it estimates it will take more than
@@ -453,9 +454,9 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t
 /// Reserve \a pages of huge OS pages (1GiB) at a specific \a numa_node,
 /// but stops after at most `timeout_msecs` seconds.
 /// @param pages The number of 1GiB pages to reserve.
-/// @param numa_node The NUMA node where the memory is reserved (start at 0).
+/// @param numa_node The NUMA node where the memory is reserved (start at 0). Use -1 for no affinity.
 /// @param timeout_msecs Maximum number of milli-seconds to try reserving, or 0 for no timeout.
-/// @returns 0 if successfull, \a ENOMEM if running out of memory, or \a ETIMEDOUT if timed out.
+/// @returns 0 if successful, \a ENOMEM if running out of memory, or \a ETIMEDOUT if timed out.
 ///
 /// The reserved memory is used by mimalloc to satisfy allocations.
 /// May quit before \a timeout_msecs are expired if it estimates it will take more than
@@ -468,7 +469,7 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
 /// Is the C runtime \a malloc API redirected?
 /// @returns \a true if all malloc API calls are redirected to mimalloc.
 ///
-/// Currenty only used on Windows.
+/// Currently only used on Windows.
 bool mi_is_redirected();
 
 /// Return process information (time and memory usage).
@@ -486,6 +487,91 @@ bool mi_is_redirected();
 /// on other systems as the amount of read/write accessible memory reserved by mimalloc.
 void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults);
 
+/// @brief Show all current arena's.
+/// @param show_inuse       Show the arena blocks that are in use.
+/// @param show_abandoned   Show the abandoned arena blocks.
+/// @param show_purge       Show arena blocks scheduled for purging.
+void mi_debug_show_arenas(bool show_inuse, bool show_abandoned, bool show_purge);
+
+/// Mimalloc uses large (virtual) memory areas, called "arena"s, from the OS to manage its memory.
+/// Each arena has an associated identifier.
+typedef int mi_arena_id_t;
+
+/// @brief  Return the size of an arena.
+/// @param arena_id  The arena identifier.
+/// @param size      Returned size in bytes of the (virtual) arena area.
+/// @return base address of the arena.
+void* mi_arena_area(mi_arena_id_t arena_id, size_t* size);
+
+/// @brief Reserve huge OS pages (1GiB) into a single arena.
+/// @param pages             Number of 1GiB pages to reserve.
+/// @param numa_node         The associated NUMA node, or -1 for no NUMA preference.
+/// @param timeout_msecs     Max amount of milli-seconds this operation is allowed to take. (0 is infinite)
+/// @param exclusive         If exclusive, only a heap associated with this arena can allocate in it.
+/// @param arena_id          The arena identifier.
+/// @return 0 if successful, \a ENOMEM if running out of memory, or \a ETIMEDOUT if timed out.
+int   mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id);
+
+/// @brief Reserve OS memory to be managed in an arena.
+/// @param size Size the reserve.
+/// @param commit Should the memory be initially committed?
+/// @param allow_large Allow the use of large OS pages?
+/// @param exclusive  Is the returned arena exclusive?
+/// @param arena_id The new arena identifier.
+/// @return Zero on success, an error code otherwise.
+int   mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id);
+
+/// @brief Manage externally allocated memory as a mimalloc arena. This memory will not be freed by mimalloc.
+/// @param start Start address of the area.
+/// @param size  Size in bytes of the area.
+/// @param is_committed  Is the memory already committed?
+/// @param is_large      Does it consist of (pinned) large OS pages?
+/// @param is_zero       Is the memory zero-initialized?
+/// @param numa_node     Associated NUMA node, or -1 to have no NUMA preference.
+/// @param exclusive     Is the arena exclusive (where only heaps associated with the arena can allocate in it)
+/// @param arena_id      The new arena identifier.
+/// @return `true` if successful.
+bool  mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id);
+
+/// @brief Create a new heap that only allocates in the specified arena.
+/// @param arena_id The arena identifier.
+/// @return The new heap or `NULL`.
+mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id);
+
+/// @brief Create a new heap
+/// @param heap_tag       The heap tag associated with this heap; heaps only reclaim memory between heaps with the same tag.
+/// @param allow_destroy  Is \a mi_heap_destroy allowed?  Not allowing this allows the heap to reclaim memory from terminated threads.
+/// @param arena_id       If not 0, the heap will only allocate from the specified arena.
+/// @return A new heap or `NULL` on failure.
+///
+/// The \a arena_id can be used by runtimes to allocate only in a specified pre-reserved arena.
+/// This is used for example for a compressed pointer heap in Koka.
+/// The \a heap_tag enables heaps to keep objects of a certain type isolated to heaps with that tag.
+/// This is used for example in the CPython integration.
+mi_heap_t* mi_heap_new_ex(int heap_tag, bool allow_destroy, mi_arena_id_t arena_id);
+
+/// A process can associate threads with sub-processes.
+/// A sub-process will not reclaim memory from (abandoned heaps/threads)
+/// other subprocesses.
+typedef void* mi_subproc_id_t;
+
+/// @brief  Get the main sub-process identifier.
+mi_subproc_id_t mi_subproc_main(void);
+
+/// @brief Create a fresh sub-process (with no associated threads yet).
+/// @return The new sub-process identifier.
+mi_subproc_id_t mi_subproc_new(void);
+
+/// @brief Delete a previously created sub-process.
+/// @param subproc The sub-process identifier.
+/// Only delete sub-processes if all associated threads have terminated.
+void mi_subproc_delete(mi_subproc_id_t subproc);
+
+/// Add the current thread to the given sub-process.
+/// This should be called right after a thread is created (and no allocation has taken place yet)
+void mi_subproc_add_current_thread(mi_subproc_id_t subproc);
+
+
 /// \}
 
 // ------------------------------------------------------
@@ -495,20 +581,24 @@ void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_m
 /// \defgroup aligned Aligned Allocation
 ///
 /// Allocating aligned memory blocks.
+/// Note that `alignment` always follows `size` for consistency with the unaligned
+/// allocation API, but unfortunately this differs from `posix_memalign` and `aligned_alloc` in the C library.
 ///
 /// \{
 
-/// The maximum supported alignment size (currently 1MiB).
-#define MI_ALIGNMENT_MAX   (1024*1024UL)
-
 /// Allocate \a size bytes aligned by \a alignment.
 /// @param size  number of bytes to allocate.
-/// @param alignment  the minimal alignment of the allocated memory. Must be less than #MI_ALIGNMENT_MAX.
-/// @returns pointer to the allocated memory or \a NULL if out of memory.
-/// The returned pointer is aligned by \a alignment, i.e.
-/// `(uintptr_t)p % alignment == 0`.
-///
+/// @param alignment  the minimal alignment of the allocated memory.
+/// @returns pointer to the allocated memory or \a NULL if out of memory,
+/// or if the alignment is not a power of 2 (including 0). The \a size is unrestricted
+/// (and does not have to be an integral multiple of the \a alignment).
+/// The returned pointer is aligned by \a alignment, i.e. `(uintptr_t)p % alignment == 0`.
 /// Returns a unique pointer if called with \a size 0.
+///
+/// Note that `alignment` always follows `size` for consistency with the unaligned
+/// allocation API, but unfortunately this differs from `posix_memalign` and `aligned_alloc` in the C library.
+///
+/// @see [aligned_alloc](https://en.cppreference.com/w/c/memory/aligned_alloc) (in the standard C11 library, with switched arguments!)
 /// @see [_aligned_malloc](https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/aligned-malloc?view=vs-2017) (on Windows)
 /// @see [aligned_alloc](http://man.openbsd.org/reallocarray) (on BSD, with switched arguments!)
 /// @see [posix_memalign](https://linux.die.net/man/3/posix_memalign) (on Posix, with switched arguments!)
@@ -522,11 +612,12 @@ void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment);
 /// @param size  number of bytes to allocate.
 /// @param alignment  the minimal alignment of the allocated memory at \a offset.
 /// @param offset     the offset that should be aligned.
-/// @returns pointer to the allocated memory or \a NULL if out of memory.
-/// The returned pointer is aligned by \a alignment at \a offset, i.e.
-/// `((uintptr_t)p + offset) % alignment == 0`.
-///
+/// @returns pointer to the allocated memory or \a NULL if out of memory,
+/// or if the alignment is not a power of 2 (including 0). The \a size is unrestricted
+/// (and does not have to be an integral multiple of the \a alignment).
+/// The returned pointer is aligned by \a alignment, i.e. `(uintptr_t)p % alignment == 0`.
 /// Returns a unique pointer if called with \a size 0.
+///
 /// @see [_aligned_offset_malloc](https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/aligned-offset-malloc?view=vs-2017) (on Windows)
 void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset);
 void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset);
@@ -558,7 +649,7 @@ mi_heap_t* mi_heap_new();
 
 /// Delete a previously allocated heap.
 /// This will release resources and migrate any
-/// still allocated blocks in this heap (efficienty)
+/// still allocated blocks in this heap (efficiently)
 /// to the default heap.
 ///
 /// If \a heap is the default heap, the default
@@ -574,12 +665,12 @@ void mi_heap_delete(mi_heap_t* heap);
 /// heap is set to the backing heap.
 void mi_heap_destroy(mi_heap_t* heap);
 
-/// Set the default heap to use for mi_malloc() et al.
+/// Set the default heap to use in the current thread for mi_malloc() et al.
 /// @param heap  The new default heap.
 /// @returns The previous default heap.
 mi_heap_t* mi_heap_set_default(mi_heap_t* heap);
 
-/// Get the default heap that is used for mi_malloc() et al.
+/// Get the default heap that is used for mi_malloc() et al. (for the current thread).
 /// @returns The current default heap.
 mi_heap_t* mi_heap_get_default();
 
@@ -764,6 +855,8 @@ typedef struct mi_heap_area_s {
   size_t committed;   ///< current committed bytes of this area
   size_t used;        ///< bytes in use by allocated blocks
   size_t block_size;  ///< size in bytes of one block
+  size_t full_block_size; ///< size in bytes of a full block including padding and metadata.
+  int    heap_tag;    ///< heap tag associated with this area (see \a mi_heap_new_ex)
 } mi_heap_area_t;
 
 /// Visitor function passed to mi_heap_visit_blocks()
@@ -788,6 +881,23 @@ typedef bool (mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* a
 /// @returns \a true if all areas and blocks were visited.
 bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg);
 
+/// @brief Visit all areas and blocks in abandoned heaps.
+/// @param subproc_id The sub-process id associated with the abandoned heaps.
+/// @param heap_tag Visit only abandoned memory with the specified heap tag, use -1 to visit all abandoned memory.
+/// @param visit_blocks If \a true visits all allocated blocks, otherwise
+///                         \a visitor is only called for every heap area.
+/// @param visitor This function is called for every area in the heap
+///                 (with \a block as \a NULL). If \a visit_all_blocks is
+///                 \a true, \a visitor is also called for every allocated
+///                 block in every area (with `block!=NULL`).
+///                 return \a false from this function to stop visiting early.
+/// @param arg extra argument passed to the \a visitor.
+/// @return \a true if all areas and blocks were visited.
+///
+/// Note: requires the option `mi_option_visit_abandoned` to be set
+/// at the start of the program.
+bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
+
 /// \}
 
 /// \defgroup options Runtime Options
@@ -799,34 +909,38 @@ bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block
 /// Runtime options.
 typedef enum mi_option_e {
   // stable options
-  mi_option_show_errors,  ///< Print error messages to `stderr`.
-  mi_option_show_stats,   ///< Print statistics to `stderr` when the program is done.
-  mi_option_verbose,      ///< Print verbose messages to `stderr`.
-
-  // the following options are experimental
-  mi_option_eager_commit, ///< Eagerly commit segments (4MiB) (enabled by default).
-  mi_option_large_os_pages,      ///< Use large OS pages (2MiB in size) if possible
-  mi_option_reserve_huge_os_pages, ///< The number of huge OS pages (1GiB in size) to reserve at the start of the program.
-  mi_option_reserve_huge_os_pages_at, ///< Reserve huge OS pages at node N.
-  mi_option_reserve_os_memory,        ///< Reserve specified amount of OS memory at startup, e.g. "1g" or "512m".
-  mi_option_segment_cache,   ///< The number of segments per thread to keep cached (0).
-  mi_option_page_reset,      ///< Reset page memory after \a mi_option_reset_delay milliseconds when it becomes free.
-  mi_option_abandoned_page_reset, //< Reset free page memory when a thread terminates.
-  mi_option_use_numa_nodes,  ///< Pretend there are at most N NUMA nodes; Use 0 to use the actual detected NUMA nodes at runtime.
-  mi_option_eager_commit_delay,  ///< the first N segments per thread are not eagerly committed (=1).
-  mi_option_os_tag,          ///< OS tag to assign to mimalloc'd memory
-  mi_option_limit_os_alloc,  ///< If set to 1, do not use OS memory for allocation (but only pre-reserved arenas)
-
-  // v1.x specific options
-  mi_option_eager_region_commit, ///< Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows)
-  mi_option_segment_reset,   ///< Experimental
-  mi_option_reset_delay,     ///< Delay in milli-seconds before resetting a page (100ms by default)
-  mi_option_reset_decommits, ///< Experimental
-
-  // v2.x specific options
-  mi_option_allow_decommit,  ///< Enable decommitting memory (=on)
-  mi_option_decommit_delay,  ///< Decommit page memory after N milli-seconds delay (25ms).
-  mi_option_segment_decommit_delay, ///< Decommit large segment memory after N milli-seconds delay (500ms).
+  mi_option_show_errors,  ///< Print error messages.
+  mi_option_show_stats,   ///< Print statistics on termination.
+  mi_option_verbose,      ///< Print verbose messages.
+  mi_option_max_errors,                 ///< issue at most N error messages
+  mi_option_max_warnings,               ///< issue at most N warning messages
+
+  // advanced options
+  mi_option_reserve_huge_os_pages,    ///< reserve N huge OS pages (1GiB pages) at startup
+  mi_option_reserve_huge_os_pages_at, ///< Reserve N huge OS pages at a specific NUMA node N.
+  mi_option_reserve_os_memory,        ///< reserve specified amount of OS memory in an arena at startup (internally, this value is in KiB; use `mi_option_get_size`)
+  mi_option_allow_large_os_pages,     ///< allow large (2 or 4 MiB) OS pages, implies eager commit. 
+  mi_option_purge_decommits,          ///< should a memory purge decommit? (=1). Set to 0 to use memory reset on a purge (instead of decommit)
+  mi_option_arena_reserve,            ///< initial memory size for arena reservation (= 1 GiB on 64-bit) (internally, this value is in KiB; use `mi_option_get_size`)
+  mi_option_os_tag,                   ///< tag used for OS logging (macOS only for now) (=100)
+  mi_option_retry_on_oom,             ///< retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries. (only on windows)
+
+  // experimental options
+  mi_option_eager_commit,             ///< eager commit segments? (after `eager_commit_delay` segments) (enabled by default).
+  mi_option_eager_commit_delay,       ///< the first N segments per thread are not eagerly committed (but per page in the segment on demand)
+  mi_option_arena_eager_commit,       ///< eager commit arenas? Use 2 to enable just on overcommit systems (=2)
+  mi_option_abandoned_page_purge,     ///< immediately purge delayed purges on thread termination
+  mi_option_purge_delay,              ///< memory purging is delayed by N milli seconds; use 0 for immediate purging or -1 for no purging at all. (=10)
+  mi_option_use_numa_nodes,           ///< 0 = use all available numa nodes, otherwise use at most N nodes.
+  mi_option_disallow_os_alloc,        ///< 1 = do not use OS memory for allocation (but only programmatically reserved arenas)
+  mi_option_limit_os_alloc,           ///< If set to 1, do not use OS memory for allocation (but only pre-reserved arenas)
+  mi_option_max_segment_reclaim,        ///< max. percentage of the abandoned segments can be reclaimed per try (=10%)
+  mi_option_destroy_on_exit,            ///< if set, release all memory on exit; sometimes used for dynamic unloading but can be unsafe
+  mi_option_arena_purge_mult,           ///< multiplier for `purge_delay` for the purging delay for arenas (=10)
+  mi_option_abandoned_reclaim_on_free,  ///< allow to reclaim an abandoned segment on a free (=1)
+  mi_option_purge_extend_delay,         ///< extend purge delay on each subsequent delay (=1)
+  mi_option_disallow_arena_alloc,       ///< 1 = do not use arena's for allocation (except if using specific arena id's)
+  mi_option_visit_abandoned,            ///< allow visiting heap blocks from abandoned threads (=0)
 
   _mi_option_last
 } mi_option_t;
@@ -838,7 +952,10 @@ void  mi_option_disable(mi_option_t option);
 void  mi_option_set_enabled(mi_option_t option, bool enable);
 void  mi_option_set_enabled_default(mi_option_t option, bool enable);
 
-long  mi_option_get(mi_option_t option);
+long   mi_option_get(mi_option_t option);
+long   mi_option_get_clamp(mi_option_t option, long min, long max);
+size_t mi_option_get_size(mi_option_t option);
+
 void  mi_option_set(mi_option_t option, long value);
 void  mi_option_set_default(mi_option_t option, long value);
 
@@ -852,21 +969,27 @@ void  mi_option_set_default(mi_option_t option, long value);
 ///
 /// \{
 
+/// Just as `free` but also checks if the pointer `p` belongs to our heap.
+void   mi_cfree(void* p);
+void* mi__expand(void* p, size_t newsize);
+
 void*  mi_recalloc(void* p, size_t count, size_t size);
 size_t mi_malloc_size(const void* p);
+size_t mi_malloc_good_size(size_t size);
 size_t mi_malloc_usable_size(const void *p);
 
-/// Just as `free` but also checks if the pointer `p` belongs to our heap.
-void   mi_cfree(void* p);
-
 int mi_posix_memalign(void** p, size_t alignment, size_t size);
 int mi__posix_memalign(void** p, size_t alignment, size_t size);
 void* mi_memalign(size_t alignment, size_t size);
 void* mi_valloc(size_t size);
-
 void* mi_pvalloc(size_t size);
 void* mi_aligned_alloc(size_t alignment, size_t size);
 
+unsigned short* mi_wcsdup(const unsigned short* s);
+unsigned char*  mi_mbsdup(const unsigned char* s);
+int mi_dupenv_s(char** buf, size_t* size, const char* name);
+int mi_wdupenv_s(unsigned short** buf, size_t* size, const unsigned short* name);
+
 /// Correspond s to [reallocarray](https://www.freebsd.org/cgi/man.cgi?query=reallocarray&sektion=3&manpath=freebsd-release-ports)
 /// in FreeBSD.
 void* mi_reallocarray(void* p, size_t count, size_t size);
@@ -874,6 +997,9 @@ void* mi_reallocarray(void* p, size_t count, size_t size);
 /// Corresponds to [reallocarr](https://man.netbsd.org/reallocarr.3) in NetBSD.
 int   mi_reallocarr(void* p, size_t count, size_t size);
 
+void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment);
+void* mi_aligned_offset_recalloc(void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
+
 void mi_free_size(void* p, size_t size);
 void mi_free_size_aligned(void* p, size_t size, size_t alignment);
 void mi_free_aligned(void* p, size_t alignment);
@@ -888,7 +1014,7 @@ void mi_free_aligned(void* p, size_t alignment);
 ///
 ///  Note: use the `mimalloc-new-delete.h` header to override the \a new
 ///        and \a delete operators globally. The wrappers here are mostly
-///        for convience for library writers that need to interface with
+///        for convenience for library writers that need to interface with
 ///        mimalloc from C++.
 ///
 /// \{
@@ -927,7 +1053,7 @@ template<class T> struct mi_stl_allocator { }
 
 /*! \page build Building
 
-Checkout the sources from Github:
+Checkout the sources from GitHub:
 ```
 git clone https://github.com/microsoft/mimalloc
 ```
@@ -998,7 +1124,7 @@ mimalloc uses only safe OS calls (`mmap` and `VirtualAlloc`) and can co-exist
 with other allocators linked to the same program.
 If you use `cmake`, you can simply use:
 ```
-find_package(mimalloc 1.0 REQUIRED)
+find_package(mimalloc 2.1 REQUIRED)
 ```
 in your `CMakeLists.txt` to find a locally installed mimalloc. Then use either:
 ```
@@ -1012,7 +1138,7 @@ to link with the static library. See `test\CMakeLists.txt` for an example.
 
 ### C++
 For best performance in C++ programs, it is also recommended to override the
-global `new` and `delete` operators. For convience, mimalloc provides
+global `new` and `delete` operators. For convenience, mimalloc provides
 [`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) which does this for you -- just include it in a single(!) source file in your project.
 
 In C++, mimalloc also provides the `mi_stl_allocator` struct which implements the `std::allocator`
@@ -1071,38 +1197,62 @@ See \ref overrides for more info.
 
 /*! \page environment Environment Options
 
-You can set further options either programmatically (using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)),
-or via environment variables.
+You can set further options either programmatically (using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)), or via environment variables:
 
 - `MIMALLOC_SHOW_STATS=1`: show statistics when the program terminates.
 - `MIMALLOC_VERBOSE=1`: show verbose messages.
 - `MIMALLOC_SHOW_ERRORS=1`: show error and warning messages.
-- `MIMALLOC_PAGE_RESET=0`: by default, mimalloc will reset (or purge) OS pages when not in use to signal to the OS
-   that the underlying physical memory can be reused. This can reduce memory fragmentation in long running (server)
-   programs. By setting it to `0` no such page resets will be done which can improve performance for programs that are not long
-   running. As an alternative, the `MIMALLOC_DECOMMIT_DELAY=`<msecs> can be set higher (100ms by default) to make the page
-   reset occur less frequently instead of turning it off completely.
-- `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages (2MiB) when available; for some workloads this can significantly
-   improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs
-   to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes
+
+Advanced options:
+
+- `MIMALLOC_ARENA_EAGER_COMMIT=2`: turns on eager commit for the large arenas (usually 1GiB) from which mimalloc
+   allocates segments and pages. Set this to 2 (default) to
+   only enable this on overcommit systems (e.g. Linux). Set this to 1 to enable explicitly on other systems
+   as well (like Windows or macOS) which may improve performance (as the whole arena is committed at once).
+   Note that eager commit only increases the commit but not the actual the peak resident set
+   (rss) so it is generally ok to enable this.
+- `MIMALLOC_PURGE_DELAY=N`: the delay in `N` milli-seconds (by default `10`) after which mimalloc will purge
+   OS pages that are not in use. This signals to the OS that the underlying physical memory can be reused which
+   can reduce memory fragmentation especially in long running (server) programs. Setting `N` to `0` purges immediately when
+   a page becomes unused which can improve memory usage but also decreases performance. Setting `N` to a higher
+   value like `100` can improve performance (sometimes by a lot) at the cost of potentially using more memory at times.
+   Setting it to `-1` disables purging completely.
+- `MIMALLOC_PURGE_DECOMMITS=1`: By default "purging" memory means unused memory is decommitted (`MEM_DECOMMIT` on Windows,
+   `MADV_DONTNEED` (which decresease rss immediately) on `mmap` systems). Set this to 0 to instead "reset" unused
+   memory on a purge (`MEM_RESET` on Windows, generally `MADV_FREE` (which does not decrease rss immediately) on `mmap` systems).
+   Mimalloc generally does not "free" OS memory but only "purges" OS memory, in other words, it tries to keep virtual
+   address ranges and decommits within those ranges (to make the underlying physical memory available to other processes).
+
+Further options for large workloads and services:
+
+- `MIMALLOC_ALLOW_THP=1`: By default always allow transparent huge pages (THP) on Linux systems. On Android only this is
+   by default off. When set to `0`, THP is disabled for the process that mimalloc runs in. If enabled, mimalloc also sets
+   the `MIMALLOC_MINIMAL_PURGE_SIZE` in v3 to 2MiB to avoid potentially breaking up transparent huge pages.
+- `MIMALLOC_USE_NUMA_NODES=N`: pretend there are at most `N` NUMA nodes. If not set, the actual NUMA nodes are detected
+   at runtime. Setting `N` to 1 may avoid problems in some virtual environments. Also, setting it to a lower number than
+   the actual NUMA nodes is fine and will only cause threads to potentially allocate more memory across actual NUMA
+   nodes (but this can happen in any case as NUMA local allocation is always a best effort but not guaranteed).
+- `MIMALLOC_ALLOW_LARGE_OS_PAGES=0`: Set to 1 to use large OS pages (2 or 4MiB) when available; for some workloads this can
+   significantly improve performance. However, large OS pages cannot be purged or shared with other processes so may lead
+   to increased memory usage in some cases.
+   Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs
+   to explicitly give permissions for large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes
    the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that
-   can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead when possible).
-- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB _huge_ OS pages. This reserves the huge pages at
+   can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead whenever possible).
+- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where `N` is the number of 1GiB _huge_ OS pages. This reserves the huge pages at
    startup and sometimes this can give a large (latency) performance improvement on big workloads.
-   Usually it is better to not use
-   `MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving
+   Usually it is better to not use `MIMALLOC_ALLOW_LARGE_OS_PAGES=1` in combination with this setting. Just like large
+   OS pages, use with care as reserving
    contiguous physical memory can take a long time when memory is fragmented (but reserving the huge pages is done at
    startup only once).
-   Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])). With huge OS pages, it may be beneficial to set the setting
-   `MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB)
-   of a thread to not allocate in the huge OS pages; this prevents threads that are short lived
-   and allocate just a little to take up space in the huge OS page area (which cannot be reset).
-- `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N`: where N is the numa node. This reserves the huge pages at a specific numa node.
-   (`N` is -1 by default to reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected))
+   Note that we usually need to explicitly give permission for huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])).
+   The huge pages are usually allocated evenly among NUMA nodes.
+   We can use `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N` where `N` is the numa node (starting at 0) to allocate all
+   the huge pages at a specific numa node instead.
 
 Use caution when using `fork` in combination with either large or huge OS pages: on a fork, the OS uses copy-on-write
 for all pages in the original process including the huge OS pages. When any memory is now written in that area, the
-OS will copy the entire 1GiB huge page (or 2MiB large page) which can cause the memory usage to grow in big increments.
+OS will copy the entire 1GiB huge page (or 2MiB large page) which can cause the memory usage to grow in large increments.
 
 [linux-huge]: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/5/html/tuning_and_optimizing_red_hat_enterprise_linux_for_oracle_9i_and_10g_databases/sect-oracle_9i_and_10g_tuning_guide-large_memory_optimization_big_pages_and_huge_pages-configuring_huge_pages_in_red_hat_enterprise_linux_4_or_5
 [windows-huge]: https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/enable-the-lock-pages-in-memory-option-windows?view=sql-server-2017
@@ -1111,88 +1261,106 @@ OS will copy the entire 1GiB huge page (or 2MiB large page) which can cause the
 
 /*! \page overrides Overriding Malloc
 
-Overriding the standard `malloc` can be done either _dynamically_ or _statically_.
+Overriding the standard `malloc` (and `new`) can be done either _dynamically_ or _statically_.
 
 ## Dynamic override
 
 This is the recommended way to override the standard malloc interface.
 
+### Dynamic Override on Linux, BSD
 
-### Linux, BSD
-
-On these systems we preload the mimalloc shared
+On these ELF-based systems we preload the mimalloc shared
 library so all calls to the standard `malloc` interface are
 resolved to the _mimalloc_ library.
-
-- `env LD_PRELOAD=/usr/lib/libmimalloc.so myprogram`
+```
+> env LD_PRELOAD=/usr/lib/libmimalloc.so myprogram
+```
 
 You can set extra environment variables to check that mimalloc is running,
 like:
 ```
-env MIMALLOC_VERBOSE=1 LD_PRELOAD=/usr/lib/libmimalloc.so myprogram
+> env MIMALLOC_VERBOSE=1 LD_PRELOAD=/usr/lib/libmimalloc.so myprogram
 ```
 or run with the debug version to get detailed statistics:
 ```
-env MIMALLOC_SHOW_STATS=1 LD_PRELOAD=/usr/lib/libmimalloc-debug.so myprogram
+> env MIMALLOC_SHOW_STATS=1 LD_PRELOAD=/usr/lib/libmimalloc-debug.so myprogram
 ```
 
-### MacOS
+### Dynamic Override on MacOS
 
 On macOS we can also preload the mimalloc shared
 library so all calls to the standard `malloc` interface are
 resolved to the _mimalloc_ library.
-
-- `env DYLD_FORCE_FLAT_NAMESPACE=1 DYLD_INSERT_LIBRARIES=/usr/lib/libmimalloc.dylib myprogram`
+```
+> env DYLD_INSERT_LIBRARIES=/usr/lib/libmimalloc.dylib myprogram
+```
 
 Note that certain security restrictions may apply when doing this from
 the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash).
 
-(Note: macOS support for dynamic overriding is recent, please report any issues.)
 
+### Dynamic Override on Windows
+
+<span id="override_on_windows">Dynamically overriding on mimalloc on Windows</span> 
+is robust and has the particular advantage to be able to redirect all malloc/free calls 
+that go through the (dynamic) C runtime allocator, including those from other DLL's or 
+libraries. As it intercepts all allocation calls on a low level, it can be used reliably 
+on large programs that include other 3rd party components.
+There are four requirements to make the overriding work well:
+
+1. Use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch).
 
-### Windows
+2. Link your program explicitly with the `mimalloc.lib` export library for the `mimalloc.dll`.
+   (which must be compiled with `-DMI_OVERRIDE=ON`, which is the default though).
+   To ensure the `mimalloc.dll` is actually loaded at run-time it is easiest 
+   to insert some call to the mimalloc API in the `main` function, like `mi_version()`
+   (or use the `/include:mi_version` switch on the linker command, or
+   similarly, `#pragma comment(linker, "/include:mi_version")` in some source file). 
+   See the `mimalloc-test-override` project for an example on how to use this. 
 
-Overriding on Windows is robust and has the
-particular advantage to be able to redirect all malloc/free calls that go through
-the (dynamic) C runtime allocator, including those from other DLL's or libraries.
+3. The `mimalloc-redirect.dll` must be put in the same directory as the main 
+   `mimalloc.dll` at runtime (as it is a dependency of that DLL).
+   The redirection DLL ensures that all calls to the C runtime malloc API get 
+   redirected to mimalloc functions (which reside in `mimalloc.dll`).
 
-The overriding on Windows requires that you link your program explicitly with
-the mimalloc DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch).
-Also, the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) must be available
-in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency).
-The redirection DLL ensures that all calls to the C runtime malloc API get redirected to
-mimalloc (in `mimalloc-override.dll`).
+4. Ensure the `mimalloc.dll` comes as early as possible in the import
+   list of the final executable (so it can intercept all potential allocations).
+   You can use `minject -l <exe>` to check this if needed.
 
-To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some
-call to the mimalloc API in the `main` function, like `mi_version()`
-(or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project
-for an example on how to use this. For best performance on Windows with C++, it
+For best performance on Windows with C++, it
 is also recommended to also override the `new`/`delete` operations (by including
-[`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) a single(!) source file in your project).
+[`mimalloc-new-delete.h`](include/mimalloc-new-delete.h)
+a single(!) source file in your project).
 
 The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic
-overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully redirected.
+overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully 
+redirected.
 
-(Note: in principle, it is possible to even patch existing executables without any recompilation
-if they are linked with the dynamic C runtime (`ucrtbase.dll`) -- just put the `mimalloc-override.dll`
-into the import table (and put `mimalloc-redirect.dll` in the same folder)
-Such patching can be done for example with [CFF Explorer](https://ntcore.com/?page_id=388)).
+For different platforms than x64, you may need a specific [redirection dll](bin).
+Furthermore, we cannot always re-link an executable or ensure `mimalloc.dll` comes
+first in the import table. In such cases the [`minject`](bin) tool can be used
+to patch the executable's import tables.
 
 
 ## Static override
 
-On Unix systems, you can also statically link with _mimalloc_ to override the standard
+On Unix-like systems, you can also statically link with _mimalloc_ to override the standard
 malloc interface. The recommended way is to link the final program with the
-_mimalloc_ single object file (`mimalloc-override.o`). We use
+_mimalloc_ single object file (`mimalloc.o`). We use
 an object file instead of a library file as linkers give preference to
 that over archives to resolve symbols. To ensure that the standard
 malloc interface resolves to the _mimalloc_ library, link it as the first
 object file. For example:
-
 ```
-gcc -o myprogram mimalloc-override.o  myfile1.c ...
+> gcc -o myprogram mimalloc.o  myfile1.c ...
 ```
 
+Another way to override statically that works on all platforms, is to
+link statically to mimalloc (as shown in the introduction) and include a
+header file in each source file that re-defines `malloc` etc. to `mi_malloc`.
+This is provided by [`mimalloc-override.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-override.h). This only works reliably though if all sources are
+under your control or otherwise mixing of pointers from different heaps may occur!
+
 ## List of Overrides:
 
 The specific functions that get redirected to the _mimalloc_ library are:
diff --git a/3rd/mimalloc-2.0.9/doc/mimalloc-doxygen.css b/3rd/mimalloc-3.2.6/doc/mimalloc-doxygen.css
similarity index 83%
rename from 3rd/mimalloc-2.0.9/doc/mimalloc-doxygen.css
rename to 3rd/mimalloc-3.2.6/doc/mimalloc-doxygen.css
index b24f5643..c889a8d2 100644
--- a/3rd/mimalloc-2.0.9/doc/mimalloc-doxygen.css
+++ b/3rd/mimalloc-3.2.6/doc/mimalloc-doxygen.css
@@ -47,3 +47,14 @@ div.fragment {
 #nav-sync img {
 	display: none;
 }
+h1,h2,h3,h4,h5,h6 {
+	transition:none;
+}
+.memtitle {
+	background-image: none;
+	background-color: #EEE;
+}
+table.memproto, .memproto {
+	text-shadow: none;
+	font-size: 110%;
+}
diff --git a/3rd/mimalloc-2.0.9/doc/mimalloc-logo-100.png b/3rd/mimalloc-3.2.6/doc/mimalloc-logo-100.png
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/mimalloc-logo-100.png
rename to 3rd/mimalloc-3.2.6/doc/mimalloc-logo-100.png
diff --git a/3rd/mimalloc-2.0.9/doc/mimalloc-logo.png b/3rd/mimalloc-3.2.6/doc/mimalloc-logo.png
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/mimalloc-logo.png
rename to 3rd/mimalloc-3.2.6/doc/mimalloc-logo.png
diff --git a/3rd/mimalloc-2.0.9/doc/mimalloc-logo.svg b/3rd/mimalloc-3.2.6/doc/mimalloc-logo.svg
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/mimalloc-logo.svg
rename to 3rd/mimalloc-3.2.6/doc/mimalloc-logo.svg
diff --git a/3rd/mimalloc-2.0.9/doc/spades-logo.png b/3rd/mimalloc-3.2.6/doc/spades-logo.png
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/spades-logo.png
rename to 3rd/mimalloc-3.2.6/doc/spades-logo.png
diff --git a/3rd/mimalloc-2.0.9/doc/unreal-logo.svg b/3rd/mimalloc-3.2.6/doc/unreal-logo.svg
similarity index 100%
rename from 3rd/mimalloc-2.0.9/doc/unreal-logo.svg
rename to 3rd/mimalloc-3.2.6/doc/unreal-logo.svg
diff --git a/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-lib.vcxproj b/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-lib.vcxproj
new file mode 100644
index 00000000..3c7e246a
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-lib.vcxproj
@@ -0,0 +1,502 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|ARM64">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|ARM64EC">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM64EC</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM64">
+      <Configuration>Release</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM64EC">
+      <Configuration>Release</Configuration>
+      <Platform>ARM64EC</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCProjectVersion>15.0</VCProjectVersion>
+    <ProjectGuid>{ABB5EAE7-B3E6-432E-B636-333449892EA6}</ProjectGuid>
+    <RootNamespace>mimalloc-lib</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
+    <ProjectName>mimalloc-lib</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+    <TargetExt>.lib</TargetExt>
+    <TargetName>mimalloc</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+    <TargetExt>.lib</TargetExt>
+    <TargetName>mimalloc</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+    <TargetExt>.lib</TargetExt>
+    <TargetName>mimalloc</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+    <TargetExt>.lib</TargetExt>
+    <TargetName>mimalloc</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">
+    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+    <TargetExt>.lib</TargetExt>
+    <TargetName>mimalloc</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+    <TargetExt>.lib</TargetExt>
+    <TargetName>mimalloc</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
+    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+    <TargetExt>.lib</TargetExt>
+    <TargetName>mimalloc</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">
+    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+    <TargetExt>.lib</TargetExt>
+    <TargetName>mimalloc</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg">
+    <VcpkgEnabled>false</VcpkgEnabled>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>Default</ConformanceMode>
+      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
+      <CompileAs>CompileAsCpp</CompileAs>
+      <SupportJustMyCode>false</SupportJustMyCode>
+      <LanguageStandard>stdcpp20</LanguageStandard>
+      <AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Lib>
+      <AdditionalLibraryDirectories>
+      </AdditionalLibraryDirectories>
+      <AdditionalDependencies>
+      </AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>Default</ConformanceMode>
+      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>MI_DEBUG=3;MI_GUARDED=0;%(PreprocessorDefinitions);</PreprocessorDefinitions>
+      <CompileAs>CompileAsCpp</CompileAs>
+      <SupportJustMyCode>false</SupportJustMyCode>
+      <LanguageStandard>stdcpp20</LanguageStandard>
+      <AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <PostBuildEvent>
+      <Command>
+      </Command>
+    </PostBuildEvent>
+    <Link>
+      <EntryPointSymbol>
+      </EntryPointSymbol>
+    </Link>
+    <Lib>
+      <AdditionalLibraryDirectories>
+      </AdditionalLibraryDirectories>
+      <AdditionalDependencies>
+      </AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>Default</ConformanceMode>
+      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>MI_DEBUG=3;MI_GUARDED=0;%(PreprocessorDefinitions);</PreprocessorDefinitions>
+      <CompileAs>CompileAsCpp</CompileAs>
+      <SupportJustMyCode>false</SupportJustMyCode>
+      <LanguageStandard>stdcpp20</LanguageStandard>
+      <AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <PostBuildEvent>
+      <Command>
+      </Command>
+    </PostBuildEvent>
+    <Link>
+      <EntryPointSymbol>
+      </EntryPointSymbol>
+    </Link>
+    <Lib>
+      <AdditionalLibraryDirectories>
+      </AdditionalLibraryDirectories>
+      <AdditionalDependencies>
+      </AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>Default</ConformanceMode>
+      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>MI_DEBUG=3;MI_GUARDED=0;%(PreprocessorDefinitions);</PreprocessorDefinitions>
+      <CompileAs>CompileAsCpp</CompileAs>
+      <SupportJustMyCode>false</SupportJustMyCode>
+      <LanguageStandard>stdcpp20</LanguageStandard>
+      <AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <PostBuildEvent>
+      <Command>
+      </Command>
+    </PostBuildEvent>
+    <Link>
+      <EntryPointSymbol>
+      </EntryPointSymbol>
+    </Link>
+    <Lib>
+      <AdditionalLibraryDirectories>
+      </AdditionalLibraryDirectories>
+      <AdditionalDependencies>
+      </AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <ConformanceMode>Default</ConformanceMode>
+      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+      <AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
+      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <InlineFunctionExpansion>Default</InlineFunctionExpansion>
+      <CompileAs>CompileAsCpp</CompileAs>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <LanguageStandard>stdcpp20</LanguageStandard>
+      <AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+    <Lib>
+      <AdditionalLibraryDirectories>
+      </AdditionalLibraryDirectories>
+      <AdditionalDependencies>
+      </AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <ConformanceMode>Default</ConformanceMode>
+      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+      <AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
+      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <InlineFunctionExpansion>Default</InlineFunctionExpansion>
+      <CompileAs>CompileAsCpp</CompileAs>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <LanguageStandard>stdcpp20</LanguageStandard>
+      <EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
+      <AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EntryPointSymbol>
+      </EntryPointSymbol>
+    </Link>
+    <PostBuildEvent>
+      <Command>
+      </Command>
+    </PostBuildEvent>
+    <Lib>
+      <AdditionalLibraryDirectories>
+      </AdditionalLibraryDirectories>
+      <AdditionalDependencies>
+      </AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <ConformanceMode>Default</ConformanceMode>
+      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+      <AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
+      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <InlineFunctionExpansion>Default</InlineFunctionExpansion>
+      <CompileAs>CompileAsCpp</CompileAs>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <LanguageStandard>stdcpp20</LanguageStandard>
+      <EnableEnhancedInstructionSet>CPUExtensionRequirementsARMv81</EnableEnhancedInstructionSet>
+      <ExceptionHandling>Sync</ExceptionHandling>
+      <AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EntryPointSymbol>
+      </EntryPointSymbol>
+    </Link>
+    <PostBuildEvent>
+      <Command>
+      </Command>
+    </PostBuildEvent>
+    <Lib>
+      <AdditionalLibraryDirectories>
+      </AdditionalLibraryDirectories>
+      <AdditionalDependencies>
+      </AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">
+    <ClCompile>
+      <WarningLevel>Level4</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <ConformanceMode>Default</ConformanceMode>
+      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+      <AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
+      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <InlineFunctionExpansion>Default</InlineFunctionExpansion>
+      <CompileAs>CompileAsCpp</CompileAs>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <LanguageStandard>stdcpp20</LanguageStandard>
+      <EnableEnhancedInstructionSet>CPUExtensionRequirementsARMv81</EnableEnhancedInstructionSet>
+      <ExceptionHandling>Sync</ExceptionHandling>
+      <AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EntryPointSymbol>
+      </EntryPointSymbol>
+    </Link>
+    <PostBuildEvent>
+      <Command>
+      </Command>
+    </PostBuildEvent>
+    <Lib>
+      <AdditionalLibraryDirectories>
+      </AdditionalLibraryDirectories>
+      <AdditionalDependencies>
+      </AdditionalDependencies>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\alloc-aligned.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">false</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\alloc-override.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\alloc-posix.c" />
+    <ClCompile Include="..\..\src\alloc.c" />
+    <ClCompile Include="..\..\src\arena-meta.c" />
+    <ClCompile Include="..\..\src\arena.c" />
+    <ClCompile Include="..\..\src\bitmap.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">false</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\free.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\heap.c" />
+    <ClCompile Include="..\..\src\theap.c" />
+    <ClCompile Include="..\..\src\init.c" />
+    <ClCompile Include="..\..\src\libc.c" />
+    <ClCompile Include="..\..\src\page-map.c" />
+    <ClCompile Include="..\..\src\prim\prim.c" />
+    <ClCompile Include="..\..\src\prim\windows\prim.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\options.c" />
+    <ClCompile Include="..\..\src\page-queue.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\page.c" />
+    <ClCompile Include="..\..\src\random.c" />
+    <ClCompile Include="..\..\src\os.c" />
+    <ClCompile Include="..\..\src\stats.c" />
+    <ClCompile Include="..\..\src\threadlocal.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h" />
+    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc-override.h" />
+    <ClInclude Include="..\..\include\mimalloc-new-delete.h" />
+    <ClInclude Include="..\..\include\mimalloc-stats.h" />
+    <ClInclude Include="..\..\include\mimalloc\atomic.h" />
+    <ClInclude Include="..\..\include\mimalloc\bits.h" />
+    <ClInclude Include="..\..\include\mimalloc\internal.h" />
+    <ClInclude Include="..\..\include\mimalloc\prim.h" />
+    <ClInclude Include="..\..\include\mimalloc\track.h" />
+    <ClInclude Include="..\..\include\mimalloc\types.h" />
+    <ClInclude Include="..\..\src\bitmap.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-lib.vcxproj.filters b/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-lib.vcxproj.filters
new file mode 100644
index 00000000..4915582d
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-lib.vcxproj.filters
@@ -0,0 +1,114 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <ClCompile Include="..\..\src\alloc.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\alloc-aligned.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\alloc-override.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\alloc-posix.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\arena.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\bitmap.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\free.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\theap.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\init.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\libc.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\options.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\os.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\page.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\page-queue.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\prim\windows\prim.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\prim\prim.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\random.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\stats.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\page-map.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\arena-meta.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\heap.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\threadlocal.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\include\mimalloc\atomic.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\bitmap.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc\internal.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc-new-delete.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc-override.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc\track.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc\types.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc\prim.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc\bits.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc-stats.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <Filter Include="Headers">
+      <UniqueIdentifier>{1430490c-e711-4ace-a1b8-36f4d5105873}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Sources">
+      <UniqueIdentifier>{461c78ef-04b0-44d1-a0ca-7d488abaa592}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-override-dll.vcxproj b/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-override-dll.vcxproj
new file mode 100644
index 00000000..ab80dd65
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-override-dll.vcxproj
@@ -0,0 +1,517 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|ARM64">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|ARM64EC">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM64EC</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM64">
+      <Configuration>Release</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM64EC">
+      <Configuration>Release</Configuration>
+      <Platform>ARM64EC</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCProjectVersion>15.0</VCProjectVersion>
+    <ProjectGuid>{ABB5EAE7-B3E6-432E-B636-333449892EA7}</ProjectGuid>
+    <RootNamespace>mimalloc-override-dll</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
+    <ProjectName>mimalloc-override-dll</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+    <TargetExt>.dll</TargetExt>
+    <TargetName>mimalloc</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+    <TargetExt>.dll</TargetExt>
+    <TargetName>mimalloc</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+    <TargetExt>.dll</TargetExt>
+    <TargetName>mimalloc</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+    <TargetExt>.dll</TargetExt>
+    <TargetName>mimalloc</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">
+    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+    <TargetExt>.dll</TargetExt>
+    <TargetName>mimalloc</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+    <TargetExt>.dll</TargetExt>
+    <TargetName>mimalloc</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
+    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+    <TargetExt>.dll</TargetExt>
+    <TargetName>mimalloc</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">
+    <OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+    <TargetExt>.dll</TargetExt>
+    <TargetName>mimalloc</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg">
+    <VcpkgEnabled>false</VcpkgEnabled>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <SupportJustMyCode>false</SupportJustMyCode>
+      <CompileAs>CompileAsCpp</CompileAs>
+      <AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <IgnoreSpecificDefaultLibraries>
+      </IgnoreSpecificDefaultLibraries>
+      <ModuleDefinitionFile>
+      </ModuleDefinitionFile>
+      <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
+      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
+      <ImportLibrary>$(OutDir)$(TargetName).dll.lib</ImportLibrary>
+      <ProgramDatabaseFile>$(OutDir)$(TargetName).dll.pdb</ProgramDatabaseFile>
+    </Link>
+    <PostBuildEvent>
+      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect32.dll" "$(OutputPath)"</Command>
+    </PostBuildEvent>
+    <PostBuildEvent>
+      <Message>Copy mimalloc-redirect32.dll to the output directory</Message>
+    </PostBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>MI_DEBUG=4;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <SupportJustMyCode>false</SupportJustMyCode>
+      <CompileAs>CompileAsCpp</CompileAs>
+      <AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <IgnoreSpecificDefaultLibraries>
+      </IgnoreSpecificDefaultLibraries>
+      <ModuleDefinitionFile>
+      </ModuleDefinitionFile>
+      <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
+      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
+      <ImportLibrary>$(OutDir)$(TargetName).dll.lib</ImportLibrary>
+      <ProgramDatabaseFile>$(OutDir)$(TargetName).dll.pdb</ProgramDatabaseFile>
+    </Link>
+    <PostBuildEvent>
+      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect.dll" "$(OutputPath)"</Command>
+    </PostBuildEvent>
+    <PostBuildEvent>
+      <Message>copy mimalloc-redirect.dll to the output directory</Message>
+    </PostBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>MI_DEBUG=4;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <SupportJustMyCode>false</SupportJustMyCode>
+      <CompileAs>CompileAsCpp</CompileAs>
+      <AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect-arm64.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <IgnoreSpecificDefaultLibraries>
+      </IgnoreSpecificDefaultLibraries>
+      <ModuleDefinitionFile>
+      </ModuleDefinitionFile>
+      <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
+      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
+      <ImportLibrary>$(OutDir)$(TargetName).dll.lib</ImportLibrary>
+      <ProgramDatabaseFile>$(OutDir)$(TargetName).dll.pdb</ProgramDatabaseFile>
+    </Link>
+    <PostBuildEvent>
+      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect-arm64.dll" "$(OutputPath)"</Command>
+    </PostBuildEvent>
+    <PostBuildEvent>
+      <Message>copy mimalloc-redirect-arm64.dll to the output directory</Message>
+    </PostBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>MI_DEBUG=4;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <SupportJustMyCode>false</SupportJustMyCode>
+      <CompileAs>CompileAsCpp</CompileAs>
+      <AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect-arm64ec.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <IgnoreSpecificDefaultLibraries>
+      </IgnoreSpecificDefaultLibraries>
+      <ModuleDefinitionFile>
+      </ModuleDefinitionFile>
+      <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
+      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
+      <ImportLibrary>$(OutDir)$(TargetName).dll.lib</ImportLibrary>
+      <ProgramDatabaseFile>$(OutDir)$(TargetName).dll.pdb</ProgramDatabaseFile>
+    </Link>
+    <PostBuildEvent>
+      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect-arm64ec.dll" "$(OutputPath)"</Command>
+    </PostBuildEvent>
+    <PostBuildEvent>
+      <Message>copy mimalloc-redirect-arm64ec.dll to the output directory</Message>
+    </PostBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+      <AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
+      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <CompileAs>CompileAsCpp</CompileAs>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <ModuleDefinitionFile>
+      </ModuleDefinitionFile>
+      <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
+      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
+      <ImportLibrary>$(OutDir)$(TargetName).dll.lib</ImportLibrary>
+      <ProgramDatabaseFile>$(OutDir)$(TargetName).dll.pdb</ProgramDatabaseFile>
+    </Link>
+    <PostBuildEvent>
+      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect32.dll" "$(OutputPath)"</Command>
+    </PostBuildEvent>
+    <PostBuildEvent>
+      <Message>Copy mimalloc-redirect32.dll to the output directory</Message>
+    </PostBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+      <AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
+      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <CompileAs>CompileAsCpp</CompileAs>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <ModuleDefinitionFile>
+      </ModuleDefinitionFile>
+      <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
+      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
+      <ImportLibrary>$(OutDir)$(TargetName).dll.lib</ImportLibrary>
+      <ProgramDatabaseFile>$(OutDir)$(TargetName).dll.pdb</ProgramDatabaseFile>
+    </Link>
+    <PostBuildEvent>
+      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect.dll" "$(OutputPath)"</Command>
+    </PostBuildEvent>
+    <PostBuildEvent>
+      <Message>copy mimalloc-redirect.dll to the output directory</Message>
+    </PostBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+      <AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
+      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <CompileAs>CompileAsCpp</CompileAs>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <EnableEnhancedInstructionSet>CPUExtensionRequirementsARMv81</EnableEnhancedInstructionSet>
+      <AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect-arm64.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <ModuleDefinitionFile>
+      </ModuleDefinitionFile>
+      <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
+      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
+      <ImportLibrary>$(OutDir)$(TargetName).dll.lib</ImportLibrary>
+      <ProgramDatabaseFile>$(OutDir)$(TargetName).dll.pdb</ProgramDatabaseFile>
+    </Link>
+    <PostBuildEvent>
+      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect-arm64.dll" "$(OutputPath)"</Command>
+    </PostBuildEvent>
+    <PostBuildEvent>
+      <Message>copy mimalloc-redirect-arm64.dll to the output directory</Message>
+    </PostBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+      <AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
+      <AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <CompileAs>CompileAsCpp</CompileAs>
+      <BufferSecurityCheck>false</BufferSecurityCheck>
+      <EnableEnhancedInstructionSet>CPUExtensionRequirementsARMv81</EnableEnhancedInstructionSet>
+      <AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect-arm64ec.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <ModuleDefinitionFile>
+      </ModuleDefinitionFile>
+      <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
+      <IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
+      <ImportLibrary>$(OutDir)$(TargetName).dll.lib</ImportLibrary>
+      <ProgramDatabaseFile>$(OutDir)$(TargetName).dll.pdb</ProgramDatabaseFile>
+    </Link>
+    <PostBuildEvent>
+      <Command>COPY /Y "$(ProjectDir)..\..\bin\mimalloc-redirect-arm64ec.dll" "$(OutputPath)"</Command>
+    </PostBuildEvent>
+    <PostBuildEvent>
+      <Message>copy mimalloc-redirect-arm64ec.dll to the output directory</Message>
+    </PostBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h" />
+    <ClInclude Include="..\..\include\mimalloc-new-delete.h" />
+    <ClInclude Include="..\..\include\mimalloc-override.h" />
+    <ClInclude Include="..\..\include\mimalloc-stats.h" />
+    <ClInclude Include="..\..\include\mimalloc\atomic.h" />
+    <ClInclude Include="..\..\include\mimalloc\bits.h" />
+    <ClInclude Include="..\..\include\mimalloc\internal.h" />
+    <ClInclude Include="..\..\include\mimalloc\prim.h" />
+    <ClInclude Include="..\..\include\mimalloc\track.h" />
+    <ClInclude Include="..\..\include\mimalloc\types.h" />
+    <ClInclude Include="..\..\src\bitmap.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\alloc-aligned.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">false</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\alloc-override.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\alloc-posix.c" />
+    <ClCompile Include="..\..\src\alloc.c" />
+    <ClCompile Include="..\..\src\arena-meta.c" />
+    <ClCompile Include="..\..\src\arena.c" />
+    <ClCompile Include="..\..\src\bitmap.c" />
+    <ClCompile Include="..\..\src\free.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\heap.c" />
+    <ClCompile Include="..\..\src\theap.c" />
+    <ClCompile Include="..\..\src\init.c" />
+    <ClCompile Include="..\..\src\libc.c" />
+    <ClCompile Include="..\..\src\page-map.c" />
+    <ClCompile Include="..\..\src\prim\prim.c" />
+    <ClCompile Include="..\..\src\prim\windows\prim.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\options.c" />
+    <ClCompile Include="..\..\src\os.c" />
+    <ClCompile Include="..\..\src\page-queue.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\src\page.c" />
+    <ClCompile Include="..\..\src\random.c" />
+    <ClCompile Include="..\..\src\stats.c" />
+    <ClCompile Include="..\..\src\threadlocal.c" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-override-dll.vcxproj.filters b/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-override-dll.vcxproj.filters
new file mode 100644
index 00000000..d8973af8
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-override-dll.vcxproj.filters
@@ -0,0 +1,114 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <ClCompile Include="..\..\src\alloc.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\alloc-aligned.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\alloc-override.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\alloc-posix.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\arena.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\bitmap.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\theap.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\init.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\libc.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\options.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\os.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\page.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\page-queue.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\prim\windows\prim.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\prim\prim.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\random.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\stats.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\page-map.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\free.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\arena-meta.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\heap.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\threadlocal.c">
+      <Filter>Sources</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\include\mimalloc\atomic.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\bitmap.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc\internal.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc-new-delete.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc-override.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc\track.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc\types.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc\prim.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc\bits.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\include\mimalloc-stats.h">
+      <Filter>Headers</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <Filter Include="Headers">
+      <UniqueIdentifier>{262c6c21-e270-4ba6-bd63-4ac999307e4e}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Sources">
+      <UniqueIdentifier>{94b40bdc-a741-45dd-81aa-c05fabcd2970}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-override-test-dep.vcxproj b/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-override-test-dep.vcxproj
new file mode 100644
index 00000000..c1b89690
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-override-test-dep.vcxproj
@@ -0,0 +1,355 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|ARM64">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|ARM64EC">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM64EC</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM64">
+      <Configuration>Release</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM64EC">
+      <Configuration>Release</Configuration>
+      <Platform>ARM64EC</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCProjectVersion>15.0</VCProjectVersion>
+    <ProjectGuid>{FEF7869F-750E-4C21-A04D-22707CC66879}</ProjectGuid>
+    <RootNamespace>mimalloc-test-override-dep</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
+    <ProjectName>mimalloc-test-override-dep</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg">
+    <VcpkgEnabled>false</VcpkgEnabled>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <ExceptionHandling>Sync</ExceptionHandling>
+      <CompileAs>Default</CompileAs>
+      <SupportJustMyCode>false</SupportJustMyCode>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <AdditionalDependencies>kernel32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+    <PostBuildEvent />
+    <PostBuildEvent>
+      <Command>
+      </Command>
+    </PostBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <ExceptionHandling>Sync</ExceptionHandling>
+      <CompileAs>Default</CompileAs>
+      <SupportJustMyCode>false</SupportJustMyCode>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EntryPointSymbol>
+      </EntryPointSymbol>
+      <AdditionalDependencies>kernel32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+    <PostBuildEvent />
+    <PostBuildEvent>
+      <Command>
+      </Command>
+    </PostBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <ExceptionHandling>Sync</ExceptionHandling>
+      <CompileAs>Default</CompileAs>
+      <SupportJustMyCode>false</SupportJustMyCode>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EntryPointSymbol>
+      </EntryPointSymbol>
+      <AdditionalDependencies>kernel32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+    <PostBuildEvent />
+    <PostBuildEvent>
+      <Command>
+      </Command>
+    </PostBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <ExceptionHandling>Sync</ExceptionHandling>
+      <CompileAs>Default</CompileAs>
+      <SupportJustMyCode>false</SupportJustMyCode>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EntryPointSymbol>
+      </EntryPointSymbol>
+      <AdditionalDependencies>kernel32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+    <PostBuildEvent />
+    <PostBuildEvent>
+      <Command>
+      </Command>
+    </PostBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <SubSystem>Console</SubSystem>
+      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+    <PostBuildEvent>
+      <Command>
+      </Command>
+    </PostBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <SubSystem>Console</SubSystem>
+      <EntryPointSymbol>
+      </EntryPointSymbol>
+      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+    <PostBuildEvent>
+      <Command>
+      </Command>
+    </PostBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <SubSystem>Console</SubSystem>
+      <EntryPointSymbol>
+      </EntryPointSymbol>
+      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+    <PostBuildEvent>
+      <Command>
+      </Command>
+    </PostBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <SubSystem>Console</SubSystem>
+      <EntryPointSymbol>
+      </EntryPointSymbol>
+      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+    <PostBuildEvent>
+      <Command>
+      </Command>
+    </PostBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\test\main-override-dep.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\test\main-override-dep.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/3rd/mimalloc-2.0.9/ide/vs2022/mimalloc-override-test.vcxproj b/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-override-test.vcxproj
similarity index 51%
rename from 3rd/mimalloc-2.0.9/ide/vs2022/mimalloc-override-test.vcxproj
rename to 3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-override-test.vcxproj
index a3c56f7b..1c7a50c5 100644
--- a/3rd/mimalloc-2.0.9/ide/vs2022/mimalloc-override-test.vcxproj
+++ b/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-override-test.vcxproj
@@ -1,10 +1,26 @@
 <?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|ARM64">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|ARM64EC">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM64EC</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Debug|Win32">
       <Configuration>Debug</Configuration>
       <Platform>Win32</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM64">
+      <Configuration>Release</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM64EC">
+      <Configuration>Release</Configuration>
+      <Platform>ARM64EC</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Release|Win32">
       <Configuration>Release</Configuration>
       <Platform>Win32</Platform>
@@ -23,7 +39,7 @@
     <ProjectGuid>{FEF7868F-750E-4C21-A04D-22707CC66879}</ProjectGuid>
     <RootNamespace>mimalloc-override-test</RootNamespace>
     <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
-    <ProjectName>mimalloc-override-test</ProjectName>
+    <ProjectName>mimalloc-test-override</ProjectName>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
@@ -42,12 +58,34 @@
     <UseDebugLibraries>true</UseDebugLibraries>
     <PlatformToolset>v143</PlatformToolset>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
     <PlatformToolset>v143</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
   </ImportGroup>
@@ -62,9 +100,21 @@
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
@@ -78,10 +128,29 @@
     <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
     <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
     <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg">
+    <VcpkgEnabled>false</VcpkgEnabled>
+  </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
@@ -128,6 +197,54 @@
       </Command>
     </PostBuildEvent>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <ExceptionHandling>Sync</ExceptionHandling>
+      <CompileAs>Default</CompileAs>
+      <SupportJustMyCode>false</SupportJustMyCode>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EntryPointSymbol>
+      </EntryPointSymbol>
+      <AdditionalDependencies>kernel32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+    <PostBuildEvent />
+    <PostBuildEvent>
+      <Command>
+      </Command>
+    </PostBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <ExceptionHandling>Sync</ExceptionHandling>
+      <CompileAs>Default</CompileAs>
+      <SupportJustMyCode>false</SupportJustMyCode>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EntryPointSymbol>
+      </EntryPointSymbol>
+      <AdditionalDependencies>kernel32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+    <PostBuildEvent />
+    <PostBuildEvent>
+      <Command>
+      </Command>
+    </PostBuildEvent>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
@@ -139,6 +256,7 @@
       <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <LanguageStandard>Default</LanguageStandard>
     </ClCompile>
     <Link>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
@@ -162,6 +280,59 @@
       <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <LanguageStandard>Default</LanguageStandard>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <SubSystem>Console</SubSystem>
+      <EntryPointSymbol>
+      </EntryPointSymbol>
+      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+    <PostBuildEvent>
+      <Command>
+      </Command>
+    </PostBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <LanguageStandard>Default</LanguageStandard>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <SubSystem>Console</SubSystem>
+      <EntryPointSymbol>
+      </EntryPointSymbol>
+      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+    <PostBuildEvent>
+      <Command>
+      </Command>
+    </PostBuildEvent>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <LanguageStandard>Default</LanguageStandard>
     </ClCompile>
     <Link>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
@@ -180,9 +351,12 @@
     <ClCompile Include="..\..\test\main-override.cpp" />
   </ItemGroup>
   <ItemGroup>
-    <ProjectReference Include="mimalloc-override.vcxproj">
+    <ProjectReference Include="mimalloc-override-dll.vcxproj">
       <Project>{abb5eae7-b3e6-432e-b636-333449892ea7}</Project>
     </ProjectReference>
+    <ProjectReference Include="mimalloc-override-test-dep.vcxproj">
+      <Project>{fef7869f-750e-4c21-a04d-22707cc66879}</Project>
+    </ProjectReference>
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
diff --git a/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-test-api.vcxproj b/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-test-api.vcxproj
new file mode 100644
index 00000000..440693a2
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-test-api.vcxproj
@@ -0,0 +1,295 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|ARM64">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|ARM64EC">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM64EC</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM64">
+      <Configuration>Release</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM64EC">
+      <Configuration>Release</Configuration>
+      <Platform>ARM64EC</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCProjectVersion>15.0</VCProjectVersion>
+    <ProjectGuid>{FFF7958F-750E-4C21-A04D-22707CC66878}</ProjectGuid>
+    <RootNamespace>mimalloc-test-api</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
+    <ProjectName>mimalloc-test-api</ProjectName>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg">
+    <VcpkgEnabled>false</VcpkgEnabled>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\test\test-api-fill.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">true</ExcludedFromBuild>
+    </ClCompile>
+    <ClCompile Include="..\..\test\test-api.c">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">false</ExcludedFromBuild>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="mimalloc-lib.vcxproj">
+      <Project>{abb5eae7-b3e6-432e-b636-333449892ea6}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/3rd/mimalloc-2.0.9/ide/vs2022/mimalloc-test-stress.vcxproj b/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-test-stress.vcxproj
similarity index 52%
rename from 3rd/mimalloc-2.0.9/ide/vs2022/mimalloc-test-stress.vcxproj
rename to 3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-test-stress.vcxproj
index c7e820df..d6af71ce 100644
--- a/3rd/mimalloc-2.0.9/ide/vs2022/mimalloc-test-stress.vcxproj
+++ b/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-test-stress.vcxproj
@@ -1,10 +1,26 @@
 <?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|ARM64">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|ARM64EC">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM64EC</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Debug|Win32">
       <Configuration>Debug</Configuration>
       <Platform>Win32</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM64">
+      <Configuration>Release</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM64EC">
+      <Configuration>Release</Configuration>
+      <Platform>ARM64EC</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Release|Win32">
       <Configuration>Release</Configuration>
       <Platform>Win32</Platform>
@@ -42,12 +58,34 @@
     <UseDebugLibraries>true</UseDebugLibraries>
     <PlatformToolset>v143</PlatformToolset>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
     <PlatformToolset>v143</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
   </ImportGroup>
@@ -62,9 +100,21 @@
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
@@ -78,10 +128,29 @@
     <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
     <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
     <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg">
+    <VcpkgEnabled>false</VcpkgEnabled>
+  </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
@@ -106,6 +175,30 @@
       <SubSystem>Console</SubSystem>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
@@ -140,16 +233,56 @@
       <SubSystem>Console</SubSystem>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>CPUExtensionRequirementsARMv81</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+      <EnableEnhancedInstructionSet>CPUExtensionRequirementsARMv81</EnableEnhancedInstructionSet>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemGroup>
     <ClCompile Include="..\..\test\test-stress.c">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">false</ExcludedFromBuild>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">false</ExcludedFromBuild>
     </ClCompile>
   </ItemGroup>
   <ItemGroup>
-    <ProjectReference Include="mimalloc.vcxproj">
+    <ProjectReference Include="mimalloc-lib.vcxproj">
       <Project>{abb5eae7-b3e6-432e-b636-333449892ea6}</Project>
     </ProjectReference>
   </ItemGroup>
diff --git a/3rd/mimalloc-2.0.9/ide/vs2022/mimalloc-test.vcxproj b/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-test.vcxproj
similarity index 52%
rename from 3rd/mimalloc-2.0.9/ide/vs2022/mimalloc-test.vcxproj
rename to 3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-test.vcxproj
index 506dd7d4..1e41fca1 100644
--- a/3rd/mimalloc-2.0.9/ide/vs2022/mimalloc-test.vcxproj
+++ b/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc-test.vcxproj
@@ -1,10 +1,26 @@
 <?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|ARM64">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|ARM64EC">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM64EC</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Debug|Win32">
       <Configuration>Debug</Configuration>
       <Platform>Win32</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM64">
+      <Configuration>Release</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM64EC">
+      <Configuration>Release</Configuration>
+      <Platform>ARM64EC</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Release|Win32">
       <Configuration>Release</Configuration>
       <Platform>Win32</Platform>
@@ -23,7 +39,7 @@
     <ProjectGuid>{FEF7858F-750E-4C21-A04D-22707CC66878}</ProjectGuid>
     <RootNamespace>mimalloctest</RootNamespace>
     <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
-    <ProjectName>mimalloc-test</ProjectName>
+    <ProjectName>mimalloc-test-static</ProjectName>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
@@ -42,12 +58,34 @@
     <UseDebugLibraries>true</UseDebugLibraries>
     <PlatformToolset>v143</PlatformToolset>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
     <PlatformToolset>v143</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
   </ImportGroup>
@@ -62,9 +100,21 @@
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
@@ -78,10 +128,29 @@
     <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
     <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
     <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">
+    <OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Label="Vcpkg">
+    <VcpkgEnabled>false</VcpkgEnabled>
+  </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
@@ -108,6 +177,32 @@
       <SubSystem>Console</SubSystem>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
@@ -144,13 +239,49 @@
       <SubSystem>Console</SubSystem>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);NDEBUG</PreprocessorDefinitions>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemGroup>
-    <ProjectReference Include="mimalloc.vcxproj">
-      <Project>{abb5eae7-b3e6-432e-b636-333449892ea6}</Project>
-    </ProjectReference>
+    <ClCompile Include="..\..\test\main-override-static.c" />
   </ItemGroup>
   <ItemGroup>
-    <ClCompile Include="..\..\test\main-override-static.c" />
+    <ProjectReference Include="mimalloc-lib.vcxproj">
+      <Project>{abb5eae7-b3e6-432e-b636-333449892ea6}</Project>
+    </ProjectReference>
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
diff --git a/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc.sln b/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc.sln
new file mode 100644
index 00000000..212b7515
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/ide/vs2022/mimalloc.sln
@@ -0,0 +1,151 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 17
+VisualStudioVersion = 17.12.35527.113
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-lib", "mimalloc-lib.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA6}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test-static", "mimalloc-test.vcxproj", "{FEF7858F-750E-4C21-A04D-22707CC66878}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override-dll", "mimalloc-override-dll.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA7}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test-override-dep", "mimalloc-override-test-dep.vcxproj", "{FEF7869F-750E-4C21-A04D-22707CC66879}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test-override", "mimalloc-override-test.vcxproj", "{FEF7868F-750E-4C21-A04D-22707CC66879}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test-stress", "mimalloc-test-stress.vcxproj", "{FEF7958F-750E-4C21-A04D-22707CC66878}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test-api", "mimalloc-test-api.vcxproj", "{FFF7958F-750E-4C21-A04D-22707CC66878}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|ARM64 = Debug|ARM64
+		Debug|ARM64EC = Debug|ARM64EC
+		Debug|x64 = Debug|x64
+		Debug|x86 = Debug|x86
+		Release|ARM64 = Release|ARM64
+		Release|ARM64EC = Release|ARM64EC
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|ARM64.Build.0 = Debug|ARM64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|ARM64EC.ActiveCfg = Debug|ARM64EC
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|ARM64EC.Build.0 = Debug|ARM64EC
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x64.ActiveCfg = Debug|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x64.Build.0 = Debug|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x86.ActiveCfg = Debug|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Debug|x86.Build.0 = Debug|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|ARM64.ActiveCfg = Release|ARM64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|ARM64.Build.0 = Release|ARM64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|ARM64EC.ActiveCfg = Release|ARM64EC
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|ARM64EC.Build.0 = Release|ARM64EC
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x64.ActiveCfg = Release|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x64.Build.0 = Release|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x86.ActiveCfg = Release|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA6}.Release|x86.Build.0 = Release|Win32
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|ARM64.Build.0 = Debug|ARM64
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|ARM64EC.ActiveCfg = Debug|ARM64EC
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|ARM64EC.Build.0 = Debug|ARM64EC
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|ARM64.ActiveCfg = Release|ARM64
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|ARM64.Build.0 = Release|ARM64
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|ARM64EC.ActiveCfg = Release|ARM64EC
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|ARM64EC.Build.0 = Release|ARM64EC
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
+		{FEF7858F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|ARM64.Build.0 = Debug|ARM64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|ARM64EC.ActiveCfg = Debug|ARM64EC
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|ARM64EC.Build.0 = Debug|ARM64EC
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x64.ActiveCfg = Debug|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x64.Build.0 = Debug|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x86.ActiveCfg = Debug|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Debug|x86.Build.0 = Debug|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|ARM64.ActiveCfg = Release|ARM64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|ARM64.Build.0 = Release|ARM64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|ARM64EC.ActiveCfg = Release|ARM64EC
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|ARM64EC.Build.0 = Release|ARM64EC
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x64.ActiveCfg = Release|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x64.Build.0 = Release|x64
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x86.ActiveCfg = Release|Win32
+		{ABB5EAE7-B3E6-432E-B636-333449892EA7}.Release|x86.Build.0 = Release|Win32
+		{FEF7869F-750E-4C21-A04D-22707CC66879}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{FEF7869F-750E-4C21-A04D-22707CC66879}.Debug|ARM64.Build.0 = Debug|ARM64
+		{FEF7869F-750E-4C21-A04D-22707CC66879}.Debug|ARM64EC.ActiveCfg = Debug|ARM64EC
+		{FEF7869F-750E-4C21-A04D-22707CC66879}.Debug|ARM64EC.Build.0 = Debug|ARM64EC
+		{FEF7869F-750E-4C21-A04D-22707CC66879}.Debug|x64.ActiveCfg = Debug|x64
+		{FEF7869F-750E-4C21-A04D-22707CC66879}.Debug|x64.Build.0 = Debug|x64
+		{FEF7869F-750E-4C21-A04D-22707CC66879}.Debug|x86.ActiveCfg = Debug|Win32
+		{FEF7869F-750E-4C21-A04D-22707CC66879}.Debug|x86.Build.0 = Debug|Win32
+		{FEF7869F-750E-4C21-A04D-22707CC66879}.Release|ARM64.ActiveCfg = Release|ARM64
+		{FEF7869F-750E-4C21-A04D-22707CC66879}.Release|ARM64.Build.0 = Release|ARM64
+		{FEF7869F-750E-4C21-A04D-22707CC66879}.Release|ARM64EC.ActiveCfg = Release|ARM64EC
+		{FEF7869F-750E-4C21-A04D-22707CC66879}.Release|ARM64EC.Build.0 = Release|ARM64EC
+		{FEF7869F-750E-4C21-A04D-22707CC66879}.Release|x64.ActiveCfg = Release|x64
+		{FEF7869F-750E-4C21-A04D-22707CC66879}.Release|x64.Build.0 = Release|x64
+		{FEF7869F-750E-4C21-A04D-22707CC66879}.Release|x86.ActiveCfg = Release|Win32
+		{FEF7869F-750E-4C21-A04D-22707CC66879}.Release|x86.Build.0 = Release|Win32
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|ARM64.Build.0 = Debug|ARM64
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|ARM64EC.ActiveCfg = Debug|ARM64EC
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|ARM64EC.Build.0 = Debug|ARM64EC
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x64.ActiveCfg = Debug|x64
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x64.Build.0 = Debug|x64
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x86.ActiveCfg = Debug|Win32
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Debug|x86.Build.0 = Debug|Win32
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|ARM64.ActiveCfg = Release|ARM64
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|ARM64.Build.0 = Release|ARM64
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|ARM64EC.ActiveCfg = Release|ARM64EC
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|ARM64EC.Build.0 = Release|ARM64EC
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x64.ActiveCfg = Release|x64
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x64.Build.0 = Release|x64
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x86.ActiveCfg = Release|Win32
+		{FEF7868F-750E-4C21-A04D-22707CC66879}.Release|x86.Build.0 = Release|Win32
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|ARM64.Build.0 = Debug|ARM64
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|ARM64EC.ActiveCfg = Debug|ARM64EC
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|ARM64EC.Build.0 = Debug|ARM64EC
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|ARM64.ActiveCfg = Release|ARM64
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|ARM64.Build.0 = Release|ARM64
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|ARM64EC.ActiveCfg = Release|ARM64EC
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|ARM64EC.Build.0 = Release|ARM64EC
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
+		{FEF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|ARM64.Build.0 = Debug|ARM64
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|ARM64EC.ActiveCfg = Debug|ARM64EC
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|ARM64EC.Build.0 = Debug|ARM64EC
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.ActiveCfg = Debug|x64
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x64.Build.0 = Debug|x64
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.ActiveCfg = Debug|Win32
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Debug|x86.Build.0 = Debug|Win32
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|ARM64.ActiveCfg = Release|ARM64
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|ARM64.Build.0 = Release|ARM64
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|ARM64EC.ActiveCfg = Release|ARM64EC
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|ARM64EC.Build.0 = Release|ARM64EC
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.ActiveCfg = Release|x64
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x64.Build.0 = Release|x64
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.ActiveCfg = Release|Win32
+		{FFF7958F-750E-4C21-A04D-22707CC66878}.Release|x86.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+		SolutionGuid = {4297F93D-486A-4243-995F-7D32F59AE82A}
+	EndGlobalSection
+EndGlobal
diff --git a/3rd/mimalloc-2.0.9/include/mimalloc-new-delete.h b/3rd/mimalloc-3.2.6/include/mimalloc-new-delete.h
similarity index 100%
rename from 3rd/mimalloc-2.0.9/include/mimalloc-new-delete.h
rename to 3rd/mimalloc-3.2.6/include/mimalloc-new-delete.h
diff --git a/3rd/mimalloc-2.0.9/include/mimalloc-override.h b/3rd/mimalloc-3.2.6/include/mimalloc-override.h
similarity index 96%
rename from 3rd/mimalloc-2.0.9/include/mimalloc-override.h
rename to 3rd/mimalloc-3.2.6/include/mimalloc-override.h
index c63b0b91..48a8a622 100644
--- a/3rd/mimalloc-2.0.9/include/mimalloc-override.h
+++ b/3rd/mimalloc-3.2.6/include/mimalloc-override.h
@@ -24,7 +24,7 @@ not accidentally mix pointers from different allocators).
 #define free(p)                 mi_free(p)
 
 #define strdup(s)               mi_strdup(s)
-#define strndup(s,n)              mi_strndup(s,n)
+#define strndup(s,n)            mi_strndup(s,n)
 #define realpath(f,n)           mi_realpath(f,n)
 
 // Microsoft extensions
@@ -43,6 +43,7 @@ not accidentally mix pointers from different allocators).
 #define reallocf(p,n)           mi_reallocf(p,n)
 #define malloc_size(p)          mi_usable_size(p)
 #define malloc_usable_size(p)   mi_usable_size(p)
+#define malloc_good_size(sz)    mi_malloc_good_size(sz)
 #define cfree(p)                mi_free(p)
 
 #define valloc(n)               mi_valloc(n)
diff --git a/3rd/mimalloc-3.2.6/include/mimalloc-stats.h b/3rd/mimalloc-3.2.6/include/mimalloc-stats.h
new file mode 100644
index 00000000..03452f2d
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/include/mimalloc-stats.h
@@ -0,0 +1,149 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2024-2025, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+#pragma once
+#ifndef MIMALLOC_STATS_H
+#define MIMALLOC_STATS_H
+
+#include <mimalloc.h>
+#include <stdint.h>
+
+#define MI_STAT_VERSION   3   // increased on every backward incompatible change
+
+// alignment for atomic fields
+#if defined(_MSC_VER)
+#define mi_decl_align(a)        __declspec(align(a))
+#elif defined(__GNUC__)
+#define mi_decl_align(a)        __attribute__((aligned(a)))
+#elif __cplusplus >= 201103L
+#define mi_decl_align(a)        alignas(a)
+#else
+#define mi_decl_align(a)
+#endif
+
+
+// count allocation over time
+typedef struct mi_stat_count_s {
+  int64_t total;                              // total allocated
+  int64_t peak;                               // peak allocation
+  int64_t current;                            // current allocation
+} mi_stat_count_t;
+
+// counters only increase
+typedef struct mi_stat_counter_s {
+  int64_t total;                              // total count
+} mi_stat_counter_t;
+
+#define MI_STAT_FIELDS() \
+  MI_STAT_COUNT(pages)                      /* count of mimalloc pages */ \
+  MI_STAT_COUNT(reserved)                   /* reserved memory bytes */ \
+  MI_STAT_COUNT(committed)                  /* committed bytes */ \
+  MI_STAT_COUNTER(reset)                    /* reset bytes */ \
+  MI_STAT_COUNTER(purged)                   /* purged bytes */ \
+  MI_STAT_COUNT(page_committed)             /* committed memory inside pages */ \
+  MI_STAT_COUNT(pages_abandoned)            /* abandonded pages count */ \
+  MI_STAT_COUNT(threads)                    /* number of threads */ \
+  MI_STAT_COUNT(malloc_normal)              /* allocated bytes <= MI_LARGE_OBJ_SIZE_MAX */ \
+  MI_STAT_COUNT(malloc_huge)                /* allocated bytes in huge pages */ \
+  MI_STAT_COUNT(malloc_requested)           /* malloc requested bytes */ \
+  \
+  MI_STAT_COUNTER(mmap_calls) \
+  MI_STAT_COUNTER(commit_calls) \
+  MI_STAT_COUNTER(reset_calls) \
+  MI_STAT_COUNTER(purge_calls) \
+  MI_STAT_COUNTER(arena_count)              /* number of memory arena's */ \
+  MI_STAT_COUNTER(malloc_normal_count)      /* number of blocks <= MI_LARGE_OBJ_SIZE_MAX */ \
+  MI_STAT_COUNTER(malloc_huge_count)        /* number of huge bloks */ \
+  MI_STAT_COUNTER(malloc_guarded_count)     /* number of allocations with guard pages */ \
+  \
+  /* internal statistics */ \
+  MI_STAT_COUNTER(arena_rollback_count) \
+  MI_STAT_COUNTER(arena_purges) \
+  MI_STAT_COUNTER(pages_extended)           /* number of page extensions */ \
+  MI_STAT_COUNTER(pages_retire)             /* number of pages that are retired */ \
+  MI_STAT_COUNTER(page_searches)            /* total pages searched for a fresh page */ \
+  MI_STAT_COUNTER(page_searches_count)      /* searched count for a fresh page */ \
+  /* only on v1 and v2 */ \
+  MI_STAT_COUNT(segments) \
+  MI_STAT_COUNT(segments_abandoned) \
+  MI_STAT_COUNT(segments_cache) \
+  MI_STAT_COUNT(_segments_reserved) \
+  /* only on v3 */ \
+  MI_STAT_COUNT(heaps) \
+  MI_STAT_COUNTER(pages_reclaim_on_alloc) \
+  MI_STAT_COUNTER(pages_reclaim_on_free) \
+  MI_STAT_COUNTER(pages_reabandon_full) \
+  MI_STAT_COUNTER(pages_unabandon_busy_wait)
+
+// Size bins for chunks
+typedef enum mi_chunkbin_e {
+  MI_CBIN_SMALL,    // slice_count == 1
+  MI_CBIN_OTHER,    // slice_count: any other from the other bins, and 1 <= slice_count <= MI_BCHUNK_BITS
+  MI_CBIN_MEDIUM,   // slice_count == 8
+  MI_CBIN_LARGE,    // slice_count == MI_SIZE_BITS  (only used if MI_ENABLE_LARGE_PAGES is 1)
+  MI_CBIN_HUGE,     // slice_count > MI_BCHUNK_BITS
+  MI_CBIN_NONE,     // no bin assigned yet (the chunk is completely free)
+  MI_CBIN_COUNT
+} mi_chunkbin_t;
+
+
+// Define the statistics structure
+#define MI_BIN_HUGE             (73U)   // see types.h
+#define MI_STAT_COUNT(stat)     mi_stat_count_t stat;
+#define MI_STAT_COUNTER(stat)   mi_stat_counter_t stat;
+
+typedef struct mi_stats_s
+{
+  int version;
+
+  mi_decl_align(8)  MI_STAT_FIELDS()
+
+  // future extension
+  mi_stat_count_t   _stat_reserved[4];
+  mi_stat_counter_t _stat_counter_reserved[4];
+
+  // size segregated statistics
+  mi_stat_count_t   malloc_bins[MI_BIN_HUGE+1];   // allocation per size bin
+  mi_stat_count_t   page_bins[MI_BIN_HUGE+1];     // pages allocated per size bin
+  mi_stat_count_t   chunk_bins[MI_CBIN_COUNT];    // chunks per page sizes
+} mi_stats_t;
+
+#undef MI_STAT_COUNT
+#undef MI_STAT_COUNTER
+
+
+// Exported definitions
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// stats from a heap
+mi_decl_export void    mi_heap_stats_get(mi_heap_t* heap, size_t stats_size, mi_stats_t* stats) mi_attr_noexcept;
+mi_decl_export char*   mi_heap_stats_get_json(mi_heap_t* heap, size_t buf_size, char* buf) mi_attr_noexcept;      // use mi_free to free the result if the input buf == NULL
+mi_decl_export void    mi_heap_stats_print_out(mi_heap_t* heap, mi_output_fun* out, void* arg) mi_attr_noexcept;
+mi_decl_export void    mi_heap_stats_merge_to_subproc(mi_heap_t* heap);
+
+// subprocess stats
+mi_decl_export void    mi_subproc_stats_get(mi_subproc_id_t subproc_id, size_t stats_size, mi_stats_t* stats) mi_attr_noexcept;
+mi_decl_export char*   mi_subproc_stats_get_json(mi_subproc_id_t subproc_id, size_t buf_size, char* buf) mi_attr_noexcept;      // use mi_free to free the result if the input buf == NULL
+mi_decl_export void    mi_subproc_stats_print_out(mi_subproc_id_t subproc_id, mi_output_fun* out, void* arg) mi_attr_noexcept;
+
+// subprocess and its heap stats segregated
+mi_decl_export void    mi_subproc_heap_stats_print_out(mi_subproc_id_t subproc_id, mi_output_fun* out, void* arg) mi_attr_noexcept;
+
+// stats aggregated for the current subprocess and all its heaps.
+mi_decl_export void    mi_stats_get(size_t stats_size, mi_stats_t* stats) mi_attr_noexcept;
+mi_decl_export char*   mi_stats_get_json(size_t buf_size, char* buf) mi_attr_noexcept;      // use mi_free to free the result if the input buf == NULL
+mi_decl_export void    mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept;
+
+
+mi_decl_export size_t  mi_stats_get_bin_size(size_t bin) mi_attr_noexcept;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // MIMALLOC_STATS_H
diff --git a/3rd/mimalloc-2.0.9/include/mimalloc.h b/3rd/mimalloc-3.2.6/include/mimalloc.h
similarity index 61%
rename from 3rd/mimalloc-2.0.9/include/mimalloc.h
rename to 3rd/mimalloc-3.2.6/include/mimalloc.h
index 9b72fbfd..3e9f2da8 100644
--- a/3rd/mimalloc-2.0.9/include/mimalloc.h
+++ b/3rd/mimalloc-3.2.6/include/mimalloc.h
@@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018-2022, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2026, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #ifndef MIMALLOC_H
 #define MIMALLOC_H
 
-#define MI_MALLOC_VERSION 209   // major + 2 digits minor
+#define MI_MALLOC_VERSION 3206   // major + minor + 2 digits patch
 
 // ------------------------------------------------------
 // Compiler specific attributes
@@ -97,7 +97,6 @@ terms of the MIT license. A copy of the license can be found in the file
 
 #include <stddef.h>     // size_t
 #include <stdbool.h>    // bool
-#include <stdint.h>     // INTPTR_MAX
 
 #ifdef __cplusplus
 extern "C" {
@@ -118,7 +117,7 @@ mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_strndup(const char* s
 mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept mi_attr_malloc;
 
 // ------------------------------------------------------
-// Extended functionality
+// Extended allocation functions
 // ------------------------------------------------------
 #define MI_SMALL_WSIZE_MAX  (128)
 #define MI_SMALL_SIZE_MAX   (MI_SMALL_WSIZE_MAX*sizeof(void*))
@@ -135,8 +134,44 @@ mi_decl_nodiscard mi_decl_export size_t mi_usable_size(const void* p) mi_attr_no
 mi_decl_nodiscard mi_decl_export size_t mi_good_size(size_t size)     mi_attr_noexcept;
 
 
+// -------------------------------------------------------------------------------------
+// Aligned allocation
+// Note that `alignment` always follows `size` for consistency with unaligned
+// allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`.
+// -------------------------------------------------------------------------------------
+
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2);
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2);
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1, 2) mi_attr_alloc_align(3);
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1, 2);
+mi_decl_nodiscard mi_decl_export void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(2) mi_attr_alloc_align(3);
+mi_decl_nodiscard mi_decl_export void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(2);
+
+
+// ------------------------------------------------------
+// Typed allocation, the type is always the first parameter
+// ------------------------------------------------------
+
+#define mi_malloc_tp(tp)                 ((tp*)mi_malloc(sizeof(tp)))
+#define mi_zalloc_tp(tp)                 ((tp*)mi_zalloc(sizeof(tp)))
+#define mi_calloc_tp(tp,n)               ((tp*)mi_calloc(n,sizeof(tp)))
+#define mi_mallocn_tp(tp,n)              ((tp*)mi_mallocn(n,sizeof(tp)))
+#define mi_reallocn_tp(tp,p,n)           ((tp*)mi_reallocn(p,n,sizeof(tp)))
+#define mi_recalloc_tp(tp,p,n)           ((tp*)mi_recalloc(p,n,sizeof(tp)))
+
+#define mi_heap_malloc_tp(tp,hp)         ((tp*)mi_heap_malloc(hp,sizeof(tp)))
+#define mi_heap_zalloc_tp(tp,hp)         ((tp*)mi_heap_zalloc(hp,sizeof(tp)))
+#define mi_heap_calloc_tp(tp,hp,n)       ((tp*)mi_heap_calloc(hp,n,sizeof(tp)))
+#define mi_heap_mallocn_tp(tp,hp,n)      ((tp*)mi_heap_mallocn(hp,n,sizeof(tp)))
+#define mi_heap_reallocn_tp(tp,hp,p,n)   ((tp*)mi_heap_reallocn(hp,p,n,sizeof(tp)))
+#define mi_heap_recalloc_tp(tp,hp,p,n)   ((tp*)mi_heap_recalloc(hp,p,n,sizeof(tp)))
+
+
 // ------------------------------------------------------
 // Internals
+// See also `mimalloc-stats.h` for statistics
 // ------------------------------------------------------
 
 typedef void (mi_cdecl mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg);
@@ -148,62 +183,70 @@ mi_decl_export void mi_register_output(mi_output_fun* out, void* arg) mi_attr_no
 typedef void (mi_cdecl mi_error_fun)(int err, void* arg);
 mi_decl_export void mi_register_error(mi_error_fun* fun, void* arg);
 
-mi_decl_export void mi_collect(bool force)    mi_attr_noexcept;
-mi_decl_export int  mi_version(void)          mi_attr_noexcept;
-mi_decl_export void mi_stats_reset(void)      mi_attr_noexcept;
-mi_decl_export void mi_stats_merge(void)      mi_attr_noexcept;
-mi_decl_export void mi_stats_print(void* out) mi_attr_noexcept;  // backward compatibility: `out` is ignored and should be NULL
-mi_decl_export void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept;
+mi_decl_export void mi_collect(bool force)      mi_attr_noexcept;
+mi_decl_export int  mi_version(void)            mi_attr_noexcept;
+mi_decl_export void mi_options_print(void)      mi_attr_noexcept;
+mi_decl_export void mi_process_info_print(void) mi_attr_noexcept;
+mi_decl_export void mi_options_print_out(mi_output_fun* out, void* arg)      mi_attr_noexcept;
+mi_decl_export void mi_process_info_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept;
+mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs,
+                                    size_t* current_rss, size_t* peak_rss,
+                                    size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept;
 
+
+
+// Generally do not use the following as these are usually called automatically
 mi_decl_export void mi_process_init(void)     mi_attr_noexcept;
+mi_decl_export void mi_cdecl mi_process_done(void) mi_attr_noexcept;
 mi_decl_export void mi_thread_init(void)      mi_attr_noexcept;
 mi_decl_export void mi_thread_done(void)      mi_attr_noexcept;
-mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept;
+mi_decl_export void mi_thread_set_in_threadpool(void) mi_attr_noexcept; // communicate that a thread is in a threadpool
 
-mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs,
-                                    size_t* current_rss, size_t* peak_rss,
-                                    size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept;
 
-// -------------------------------------------------------------------------------------
-// Aligned allocation
-// Note that `alignment` always follows `size` for consistency with unaligned
-// allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`.
-// -------------------------------------------------------------------------------------
+// -----------------------------------------------------------------
+// Return allocated block size (if the return value is not NULL)
+// -----------------------------------------------------------------
 
-mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2);
-mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
-mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2);
-mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
-mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2) mi_attr_alloc_align(3);
-mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2);
-mi_decl_nodiscard mi_decl_export void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(2) mi_attr_alloc_align(3);
-mi_decl_nodiscard mi_decl_export void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(2);
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_umalloc(size_t size, size_t* block_size)  mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_ucalloc(size_t count, size_t size, size_t* block_size)  mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2);
+mi_decl_nodiscard mi_decl_export void* mi_urealloc(void* p, size_t newsize, size_t* block_size_pre, size_t* block_size_post) mi_attr_noexcept mi_attr_alloc_size(2);
+mi_decl_export void mi_ufree(void* p, size_t* block_size) mi_attr_noexcept;
+
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_umalloc_aligned(size_t size, size_t alignment, size_t* block_size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2);
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_uzalloc_aligned(size_t size, size_t alignment, size_t* block_size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2);
+
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_umalloc_small(size_t size, size_t* block_size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_uzalloc_small(size_t size, size_t* block_size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
 
 
 // -------------------------------------------------------------------------------------
-// Heaps: first-class, but can only allocate from the same thread that created it.
+// Heaps: first-class. Can allocate from any thread (and be free'd from any thread)
+// Heaps keep allocations in separate pages from each other (but share the arena's and free'd pages)
 // -------------------------------------------------------------------------------------
 
 struct mi_heap_s;
 typedef struct mi_heap_s mi_heap_t;
 
 mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new(void);
-mi_decl_export void       mi_heap_delete(mi_heap_t* heap);
-mi_decl_export void       mi_heap_destroy(mi_heap_t* heap);
-mi_decl_export mi_heap_t* mi_heap_set_default(mi_heap_t* heap);
-mi_decl_export mi_heap_t* mi_heap_get_default(void);
-mi_decl_export mi_heap_t* mi_heap_get_backing(void);
-mi_decl_export void       mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept;
-
-mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2);
-mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2);
-mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3);
+mi_decl_export void mi_heap_delete(mi_heap_t* heap);            // move live blocks to the main heap
+mi_decl_export void mi_heap_destroy(mi_heap_t* heap);           // free all live blocks
+mi_decl_export void mi_heap_set_numa_affinity(mi_heap_t* heap, int numa_node);
+mi_decl_export void mi_heap_collect(mi_heap_t* heap, bool force);
+
+mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_main(void);
+mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_of(void* p);
+mi_decl_nodiscard mi_decl_export bool       mi_heap_contains(mi_heap_t* heap, void* p);
+mi_decl_nodiscard mi_decl_export bool       mi_any_heap_contains(void* p);
+
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc(mi_heap_t* theap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2);
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size)  mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2);
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size)  mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3);
 mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3);
-mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2);
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* theap, size_t size)   mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2);
 
 mi_decl_nodiscard mi_decl_export void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize)              mi_attr_noexcept mi_attr_alloc_size(3);
-mi_decl_nodiscard mi_decl_export void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size)  mi_attr_noexcept mi_attr_alloc_size2(3,4);
-mi_decl_nodiscard mi_decl_export void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize)             mi_attr_noexcept mi_attr_alloc_size(3);
+mi_decl_nodiscard mi_decl_export void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size)  mi_attr_noexcept mi_attr_alloc_size2(3, 4);
+mi_decl_nodiscard mi_decl_export void* mi_heap_reallocf(mi_heap_t* theap, void* p, size_t newsize)            mi_attr_noexcept mi_attr_alloc_size(3);
 
 mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s)            mi_attr_noexcept mi_attr_malloc;
 mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept mi_attr_malloc;
@@ -235,81 +278,149 @@ mi_decl_nodiscard mi_decl_export void* mi_recalloc_aligned(void* p, size_t newco
 mi_decl_nodiscard mi_decl_export void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size2(2,3);
 
 mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize)                mi_attr_noexcept mi_attr_alloc_size(3);
-mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size)  mi_attr_noexcept mi_attr_alloc_size2(3,4);
+mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size)  mi_attr_noexcept mi_attr_alloc_size2(3, 4);
 
 mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(3) mi_attr_alloc_align(4);
 mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(3);
-mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_alloc_size2(3,4) mi_attr_alloc_align(5);
-mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size2(3,4);
+mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_alloc_size2(3, 4) mi_attr_alloc_align(5);
+mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size2(3, 4);
+
 
 
 // ------------------------------------------------------
-// Analysis
+// Visiting pages and individual blocks in a heap.
 // ------------------------------------------------------
 
-mi_decl_export bool mi_heap_contains_block(mi_heap_t* heap, const void* p);
-mi_decl_export bool mi_heap_check_owned(mi_heap_t* heap, const void* p);
-mi_decl_export bool mi_check_owned(const void* p);
-
 // An area of heap space contains blocks of a single size.
 typedef struct mi_heap_area_s {
-  void*  blocks;      // start of the area containing heap blocks
+  void*  blocks;      // start of the area containing theap blocks
   size_t reserved;    // bytes reserved for this area (virtual)
   size_t committed;   // current available bytes for this area
   size_t used;        // number of allocated blocks
   size_t block_size;  // size in bytes of each block
   size_t full_block_size; // size in bytes of a full block including padding and metadata.
+  void*  reserved1;   // internal
 } mi_heap_area_t;
 
 typedef bool (mi_cdecl mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg);
 
-mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg);
+mi_decl_export bool   mi_heap_visit_blocks(mi_heap_t* heap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
+mi_decl_export bool   mi_heap_visit_abandoned_blocks(mi_heap_t* heap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
+
+
+// ------------------------------------------------------
+// Arena memory management
+// Arena's are larger memory area's provided by the OS or user
+// ------------------------------------------------------
 
-// Experimental
-mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept;
 mi_decl_nodiscard mi_decl_export bool mi_is_redirected(void) mi_attr_noexcept;
 
-mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept;
-mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept;
+mi_decl_export int    mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept;
+mi_decl_export int    mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept;
 
-mi_decl_export int  mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept;
-mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept;
+mi_decl_export int    mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept;
+mi_decl_export bool   mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_pinned /* cannot decommit/reset? */, bool is_zero, int numa_node) mi_attr_noexcept;
 
-mi_decl_export void mi_debug_show_arenas(void) mi_attr_noexcept;
+mi_decl_export void   mi_debug_show_arenas(void) mi_attr_noexcept;
+mi_decl_export void   mi_arenas_print(void) mi_attr_noexcept;
+mi_decl_export size_t mi_arena_min_alignment(void);
 
-// Experimental: heaps associated with specific memory arena's
-typedef int mi_arena_id_t;
-mi_decl_export void* mi_arena_area(mi_arena_id_t arena_id, size_t* size);
-mi_decl_export int   mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
-mi_decl_export int   mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
-mi_decl_export bool  mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
+typedef void* mi_arena_id_t;
+mi_decl_export void*  mi_arena_area(mi_arena_id_t arena_id, size_t* size);
+mi_decl_export int    mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
+mi_decl_export int    mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
+mi_decl_export bool   mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_pinned, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
+mi_decl_export bool   mi_arena_contains(mi_arena_id_t arena_id, const void* p);
 
-#if MI_MALLOC_VERSION >= 200
 // Create a heap that only allocates in the specified arena
 mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id);
-#endif
 
-// deprecated
-mi_decl_export int  mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
+
+// ------------------------------------------------------
+// Subprocesses
+// Advanced: allow sub-processes whose memory arena's stay fully separated (and no reclamation between them).
+// Used for example for separate interpreters in one process.
+// ------------------------------------------------------
+
+typedef void* mi_subproc_id_t;                        
+mi_decl_export mi_subproc_id_t mi_subproc_main(void);
+mi_decl_export mi_subproc_id_t mi_subproc_current(void);
+mi_decl_export mi_subproc_id_t mi_subproc_new(void);
+mi_decl_export void mi_subproc_destroy(mi_subproc_id_t subproc);
+mi_decl_export void mi_subproc_add_current_thread(mi_subproc_id_t subproc); // this should be called right after a thread is created (and no allocation has taken place yet)
+
+typedef bool (mi_cdecl mi_heap_visit_fun)(mi_heap_t* heap, void* arg);
+mi_decl_export bool mi_subproc_visit_heaps(mi_subproc_id_t subproc, mi_heap_visit_fun* visitor, void* arg);
+
+
+// -------------------------------------------------------------------------------------
+// A "theap" is a thread-local heap. This API is only provided for special circumstances like runtimes
+// that already have a thread-local context and can store the theap there for (slightly) faster allocations.
+// This also allows to set a default theap for the current thread so that `malloc` etc. allocate from
+// that theap (instead of the main (t)heap).
+// Theaps are first-class, but can only allocate from the same thread that created it.
+// Allocation through a `theap` may be a tiny bit faster than using plain malloc
+// (as we don't need to lookup the thread local variable).
+// -------------------------------------------------------------------------------------
+
+struct mi_theap_s;
+typedef struct mi_theap_s mi_theap_t;
+
+mi_decl_export mi_theap_t* mi_heap_theap(mi_heap_t* heap);
+mi_decl_export mi_theap_t* mi_theap_set_default(mi_theap_t* theap);
+mi_decl_export mi_theap_t* mi_theap_get_default(void);
+mi_decl_export void        mi_theap_collect(mi_theap_t* theap, bool force) mi_attr_noexcept;
+
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_theap_malloc(mi_theap_t* theap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2);
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_theap_zalloc(mi_theap_t* theap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2);
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_theap_calloc(mi_theap_t* theap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3);
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_theap_malloc_small(mi_theap_t* theap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2);
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_theap_malloc_aligned(mi_theap_t* theap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3);
+mi_decl_nodiscard mi_decl_export                  void* mi_theap_realloc(mi_theap_t* theap, void* p, size_t newsize)              mi_attr_noexcept mi_attr_alloc_size(3);
 
 
 // ------------------------------------------------------
-// Convenience
+// Experimental
 // ------------------------------------------------------
 
-#define mi_malloc_tp(tp)                ((tp*)mi_malloc(sizeof(tp)))
-#define mi_zalloc_tp(tp)                ((tp*)mi_zalloc(sizeof(tp)))
-#define mi_calloc_tp(tp,n)              ((tp*)mi_calloc(n,sizeof(tp)))
-#define mi_mallocn_tp(tp,n)             ((tp*)mi_mallocn(n,sizeof(tp)))
-#define mi_reallocn_tp(p,tp,n)          ((tp*)mi_reallocn(p,n,sizeof(tp)))
-#define mi_recalloc_tp(p,tp,n)          ((tp*)mi_recalloc(p,n,sizeof(tp)))
+// Experimental: objects followed by a guard page.
+// Setting the sample rate on a specific theap can be used to test parts of the program more
+// specifically (in combination with `mi_theap_set_default`).
+// A sample rate of 0 disables guarded objects, while 1 uses a guard page for every object.
+// A seed of 0 uses a random start point. Only objects within the size bound are eligable for guard pages.
+mi_decl_export void mi_theap_guarded_set_sample_rate(mi_theap_t* theap, size_t sample_rate, size_t seed);
+mi_decl_export void mi_theap_guarded_set_size_bound(mi_theap_t* theap, size_t min, size_t max);
+
+// very experimental
+typedef bool (mi_cdecl mi_commit_fun_t)(bool commit, void* start, size_t size, bool* is_zero, void* user_arg);
+mi_decl_export bool  mi_manage_memory(void* start, size_t size, bool is_committed, bool is_pinned, bool is_zero, int numa_node, bool exclusive,
+                                      mi_commit_fun_t* commit_fun, void* commit_fun_arg, mi_arena_id_t* arena_id) mi_attr_noexcept;
+
+//mi_decl_export bool  mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t* accessed_size, size_t* size);
+//mi_decl_export bool  mi_arena_reload(void* start, size_t size, mi_commit_fun_t* commit_fun, void* commit_fun_arg, mi_arena_id_t* arena_id);
+//mi_decl_export bool  mi_theap_reload(mi_theap_t* theap, mi_arena_id_t arena);
+//mi_decl_export void  mi_theap_unload(mi_theap_t* theap);
+
+
+// ------------------------------------------------------
+// Deprecated
+// ------------------------------------------------------
+
+mi_decl_export bool mi_check_owned(const void* p);
+
+mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept;
+mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept;
+mi_decl_export bool mi_theap_visit_blocks(const mi_theap_t* theap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
+
+mi_decl_export int  mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
+mi_decl_export void mi_collect_reduce(size_t target_thread_owned) mi_attr_noexcept;
+
+mi_decl_export void mi_stats_reset(void)      mi_attr_noexcept;
+mi_decl_export void mi_stats_merge(void)      mi_attr_noexcept;
+mi_decl_export void mi_stats_print(void* out) mi_attr_noexcept;  // backward compatibility: `out` is ignored and should be NULL
+
+mi_decl_export void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept;  // not deprecated but declared in `mimalloc-stats.h` now.
 
-#define mi_heap_malloc_tp(hp,tp)        ((tp*)mi_heap_malloc(hp,sizeof(tp)))
-#define mi_heap_zalloc_tp(hp,tp)        ((tp*)mi_heap_zalloc(hp,sizeof(tp)))
-#define mi_heap_calloc_tp(hp,tp,n)      ((tp*)mi_heap_calloc(hp,n,sizeof(tp)))
-#define mi_heap_mallocn_tp(hp,tp,n)     ((tp*)mi_heap_mallocn(hp,n,sizeof(tp)))
-#define mi_heap_reallocn_tp(hp,p,tp,n)  ((tp*)mi_heap_reallocn(hp,p,n,sizeof(tp)))
-#define mi_heap_recalloc_tp(hp,p,tp,n)  ((tp*)mi_heap_recalloc(hp,p,n,sizeof(tp)))
 
 
 // ------------------------------------------------------
@@ -318,35 +429,59 @@ mi_decl_export int  mi_reserve_huge_os_pages(size_t pages, double max_secs, size
 
 typedef enum mi_option_e {
   // stable options
-  mi_option_show_errors,
-  mi_option_show_stats,
-  mi_option_verbose,
-  // some of the following options are experimental
-  // (deprecated options are kept for binary backward compatibility with v1.x versions)
-  mi_option_eager_commit,
-  mi_option_deprecated_eager_region_commit,
-  mi_option_deprecated_reset_decommits,
-  mi_option_large_os_pages,           // use large (2MiB) OS pages, implies eager commit
-  mi_option_reserve_huge_os_pages,    // reserve N huge OS pages (1GiB) at startup
-  mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node
-  mi_option_reserve_os_memory,        // reserve specified amount of OS memory at startup
+  mi_option_show_errors,                // print error messages
+  mi_option_show_stats,                 // print statistics on termination
+  mi_option_verbose,                    // print verbose messages
+  // advanced options
+  mi_option_deprecated_eager_commit,    
+  mi_option_arena_eager_commit,         // eager commit arenas? Use 2 to enable just on overcommit systems (=2)
+  mi_option_purge_decommits,            // should a memory purge decommit? (=1). Set to 0 to use memory reset on a purge (instead of decommit)
+  mi_option_allow_large_os_pages,       // allow use of large (2 or 4 MiB) OS pages, implies eager commit.
+  mi_option_reserve_huge_os_pages,      // reserve N huge OS pages (1GiB pages) at startup
+  mi_option_reserve_huge_os_pages_at,   // reserve huge OS pages at a specific NUMA node
+  mi_option_reserve_os_memory,          // reserve specified amount of OS memory in an arena at startup (internally, this value is in KiB; use `mi_option_get_size`)
   mi_option_deprecated_segment_cache,
-  mi_option_page_reset,
-  mi_option_abandoned_page_decommit,
+  mi_option_deprecated_page_reset,
+  mi_option_deprecated_abandoned_page_purge,
   mi_option_deprecated_segment_reset,
-  mi_option_eager_commit_delay,
-  mi_option_decommit_delay,
-  mi_option_use_numa_nodes,           // 0 = use available numa nodes, otherwise use at most N nodes.
-  mi_option_limit_os_alloc,           // 1 = do not use OS memory for allocation (but only reserved arenas)
-  mi_option_os_tag,
-  mi_option_max_errors,
-  mi_option_max_warnings,
-  mi_option_max_segment_reclaim,
-  mi_option_allow_decommit,
-  mi_option_segment_decommit_delay,  
-  mi_option_decommit_extend_delay,
-  mi_option_destroy_on_exit,          
-  _mi_option_last
+  mi_option_deprecated_eager_commit_delay, 
+  mi_option_purge_delay,                // memory purging is delayed by N milli seconds; use 0 for immediate purging or -1 for no purging at all. (=10)
+  mi_option_use_numa_nodes,             // 0 = use all available numa nodes, otherwise use at most N nodes.
+  mi_option_disallow_os_alloc,          // 1 = do not use OS memory for allocation (but only programmatically reserved arenas)
+  mi_option_os_tag,                     // tag used for OS logging (macOS only for now) (=100)
+  mi_option_max_errors,                 // issue at most N error messages
+  mi_option_max_warnings,               // issue at most N warning messages
+  mi_option_deprecated_max_segment_reclaim,  // max. percentage of the abandoned segments can be reclaimed per try (=10%)
+  mi_option_destroy_on_exit,            // if set, release all memory on exit; sometimes used for dynamic unloading but can be unsafe
+  mi_option_arena_reserve,              // initial memory size for arena reservation (= 1 GiB on 64-bit) (internally, this value is in KiB; use `mi_option_get_size`)
+  mi_option_arena_purge_mult,           // multiplier for `purge_delay` for the purging delay for arenas (=10)
+  mi_option_deprecated_purge_extend_delay,
+  mi_option_disallow_arena_alloc,       // 1 = do not use arena's for allocation (except if using specific arena id's)
+  mi_option_retry_on_oom,               // retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries. (only on windows)
+  mi_option_visit_abandoned,            // allow visiting theap blocks from abandoned threads (=0)
+  mi_option_guarded_min,                // only used when building with MI_GUARDED: minimal rounded object size for guarded objects (=0)
+  mi_option_guarded_max,                // only used when building with MI_GUARDED: maximal rounded object size for guarded objects (=0)
+  mi_option_guarded_precise,            // disregard minimal alignment requirement to always place guarded blocks exactly in front of a guard page (=0)
+  mi_option_guarded_sample_rate,        // 1 out of N allocations in the min/max range will be guarded (=1000)
+  mi_option_guarded_sample_seed,        // can be set to allow for a (more) deterministic re-execution when a guard page is triggered (=0)
+  mi_option_generic_collect,            // collect theaps every N (=10000) generic allocation calls
+  mi_option_page_reclaim_on_free,       // reclaim abandoned pages on a free (=0). -1 disallowr always, 0 allows if the page originated from the current theap, 1 allow always
+  mi_option_page_full_retain,           // retain N full (small) pages per size class (=2)
+  mi_option_page_max_candidates,        // max candidate pages to consider for allocation (=4)
+  mi_option_max_vabits,                 // max user space virtual address bits to consider (=48)
+  mi_option_pagemap_commit,             // commit the full pagemap (to always catch invalid pointer uses) (=0)
+  mi_option_page_commit_on_demand,      // commit page memory on-demand
+  mi_option_page_max_reclaim,           // don't reclaim pages of the same originating theap if we already own N pages (in that size class) (=-1 (unlimited))
+  mi_option_page_cross_thread_max_reclaim, // don't reclaim pages across threads if we already own N pages (in that size class) (=16)
+  mi_option_allow_thp,                  // allow transparent huge pages? (=1) (on Android =0 by default). Set to 0 to disable THP for the process.
+  mi_option_minimal_purge_size,         // set minimal purge size (in KiB) (=0). By default set to either 64 or 2048 if THP is enabled.
+  _mi_option_last,
+  // legacy option names
+  mi_option_large_os_pages = mi_option_allow_large_os_pages,
+  mi_option_eager_region_commit = mi_option_arena_eager_commit,
+  mi_option_reset_decommits = mi_option_purge_decommits,
+  mi_option_reset_delay = mi_option_purge_delay,
+  mi_option_limit_os_alloc = mi_option_disallow_os_alloc
 } mi_option_t;
 
 
@@ -356,8 +491,9 @@ mi_decl_export void mi_option_disable(mi_option_t option);
 mi_decl_export void mi_option_set_enabled(mi_option_t option, bool enable);
 mi_decl_export void mi_option_set_enabled_default(mi_option_t option, bool enable);
 
-mi_decl_nodiscard mi_decl_export long mi_option_get(mi_option_t option);
-mi_decl_nodiscard mi_decl_export long mi_option_get_clamp(mi_option_t option, long min, long max);
+mi_decl_nodiscard mi_decl_export long   mi_option_get(mi_option_t option);
+mi_decl_nodiscard mi_decl_export long   mi_option_get_clamp(mi_option_t option, long min, long max);
+mi_decl_nodiscard mi_decl_export size_t mi_option_get_size(mi_option_t option);
 mi_decl_export void mi_option_set(mi_option_t option, long value);
 mi_decl_export void mi_option_set_default(mi_option_t option, long value);
 
@@ -365,7 +501,7 @@ mi_decl_export void mi_option_set_default(mi_option_t option, long value);
 // -------------------------------------------------------------------------------------------------------
 // "mi" prefixed implementations of various posix, Unix, Windows, and C++ allocation functions.
 // (This can be convenient when providing overrides of these functions as done in `mimalloc-override.h`.)
-// note: we use `mi_cfree` as "checked free" and it checks if the pointer is in our heap before free-ing.
+// note: we use `mi_cfree` as "checked free" and it checks if the pointer is in our theap before free-ing.
 // -------------------------------------------------------------------------------------------------------
 
 mi_decl_export void  mi_cfree(void* p) mi_attr_noexcept;
@@ -404,7 +540,7 @@ mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_n(size_t count, s
 mi_decl_nodiscard mi_decl_export void* mi_new_realloc(void* p, size_t newsize)                mi_attr_alloc_size(2);
 mi_decl_nodiscard mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, size_t size) mi_attr_alloc_size2(2, 3);
 
-mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_alloc_new(mi_heap_t* heap, size_t size)                mi_attr_malloc mi_attr_alloc_size(2);
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_alloc_new(mi_heap_t* heap, size_t size)                 mi_attr_malloc mi_attr_alloc_size(2);
 mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(2, 3);
 
 #ifdef __cplusplus
@@ -477,16 +613,18 @@ template<class T1,class T2> bool operator==(const mi_stl_allocator<T1>& , const
 template<class T1,class T2> bool operator!=(const mi_stl_allocator<T1>& , const mi_stl_allocator<T2>& ) mi_attr_noexcept { return false; }
 
 
-#if (__cplusplus >= 201103L) || (_MSC_VER > 1900)  // C++11
+#if (__cplusplus >= 201103L) || (_MSC_VER >= 1900)  // C++11
+#define MI_HAS_HEAP_STL_ALLOCATOR 1
+
 #include <memory>      // std::shared_ptr
 
-// Common base class for STL allocators in a specific heap
-template<class T, bool destroy> struct _mi_heap_stl_allocator_common : public _mi_stl_allocator_common<T> {
+// Common base class for STL allocators in a specific theap
+template<class T, bool _mi_destroy> struct _mi_heap_stl_allocator_common : public _mi_stl_allocator_common<T> {
   using typename _mi_stl_allocator_common<T>::size_type;
   using typename _mi_stl_allocator_common<T>::value_type;
   using typename _mi_stl_allocator_common<T>::pointer;
 
-  _mi_heap_stl_allocator_common(mi_heap_t* hp) : heap(hp) { }    /* will not delete nor destroy the passed in heap */
+  _mi_heap_stl_allocator_common(mi_heap_t* hp) : heap(hp, [](mi_heap_t*) {}) {}    /* will not delete nor destroy the passed in heap */
 
   #if (__cplusplus >= 201703L)  // C++17
   mi_decl_nodiscard T* allocate(size_type count) { return static_cast<T*>(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(T))); }
@@ -500,18 +638,18 @@ template<class T, bool destroy> struct _mi_heap_stl_allocator_common : public _m
   #endif
 
   void collect(bool force) { mi_heap_collect(this->heap.get(), force); }
-  template<class U> bool is_equal(const _mi_heap_stl_allocator_common<U, destroy>& x) const { return (this->heap == x.heap); }
+  template<class U> bool is_equal(const _mi_heap_stl_allocator_common<U, _mi_destroy>& x) const { return (this->heap == x.heap); }
 
 protected:
   std::shared_ptr<mi_heap_t> heap;
   template<class U, bool D> friend struct _mi_heap_stl_allocator_common;
-  
+
   _mi_heap_stl_allocator_common() {
     mi_heap_t* hp = mi_heap_new();
-    this->heap.reset(hp, (destroy ? &heap_destroy : &heap_delete));  /* calls heap_delete/destroy when the refcount drops to zero */
+    this->heap.reset(hp, (_mi_destroy ? &heap_destroy : &heap_delete));  /* calls heap_delete/destroy when the refcount drops to zero */
   }
   _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common& x) mi_attr_noexcept : heap(x.heap) { }
-  template<class U> _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common<U, destroy>& x) mi_attr_noexcept : heap(x.heap) { }
+  template<class U> _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common<U, _mi_destroy>& x) mi_attr_noexcept : heap(x.heap) { }
 
 private:
   static void heap_delete(mi_heap_t* hp)  { if (hp != NULL) { mi_heap_delete(hp); } }
@@ -522,7 +660,7 @@ template<class T, bool destroy> struct _mi_heap_stl_allocator_common : public _m
 template<class T> struct mi_heap_stl_allocator : public _mi_heap_stl_allocator_common<T, false> {
   using typename _mi_heap_stl_allocator_common<T, false>::size_type;
   mi_heap_stl_allocator() : _mi_heap_stl_allocator_common<T, false>() { } // creates fresh heap that is deleted when the destructor is called
-  mi_heap_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T, false>(hp) { }  // no delete nor destroy on the passed in heap 
+  mi_heap_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T, false>(hp) { }  // no delete nor destroy on the passed in heap
   template<class U> mi_heap_stl_allocator(const mi_heap_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T, false>(x) { }
 
   mi_heap_stl_allocator select_on_container_copy_construction() const { return *this; }
@@ -539,7 +677,7 @@ template<class T1, class T2> bool operator!=(const mi_heap_stl_allocator<T1>& x,
 template<class T> struct mi_heap_destroy_stl_allocator : public _mi_heap_stl_allocator_common<T, true> {
   using typename _mi_heap_stl_allocator_common<T, true>::size_type;
   mi_heap_destroy_stl_allocator() : _mi_heap_stl_allocator_common<T, true>() { } // creates fresh heap that is destroyed when the destructor is called
-  mi_heap_destroy_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T, true>(hp) { }  // no delete nor destroy on the passed in heap 
+  mi_heap_destroy_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T, true>(hp) { }  // no delete nor destroy on the passed in heap
   template<class U> mi_heap_destroy_stl_allocator(const mi_heap_destroy_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T, true>(x) { }
 
   mi_heap_destroy_stl_allocator select_on_container_copy_construction() const { return *this; }
diff --git a/3rd/mimalloc-2.0.9/include/mimalloc-atomic.h b/3rd/mimalloc-3.2.6/include/mimalloc/atomic.h
similarity index 58%
rename from 3rd/mimalloc-2.0.9/include/mimalloc-atomic.h
rename to 3rd/mimalloc-3.2.6/include/mimalloc/atomic.h
index c66f8049..699c78b4 100644
--- a/3rd/mimalloc-2.0.9/include/mimalloc-atomic.h
+++ b/3rd/mimalloc-3.2.6/include/mimalloc/atomic.h
@@ -1,17 +1,28 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018-2021 Microsoft Research, Daan Leijen
+Copyright (c) 2018-2024 Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
 -----------------------------------------------------------------------------*/
 #pragma once
-#ifndef MIMALLOC_ATOMIC_H
-#define MIMALLOC_ATOMIC_H
+#ifndef MI_ATOMIC_H
+#define MI_ATOMIC_H
+
+// include windows.h or pthreads.h
+#if defined(_WIN32)
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
+#include <windows.h>
+#elif !defined(__wasi__) && (!defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__))
+#define  MI_USE_PTHREADS
+#include <pthread.h>
+#endif
 
 // --------------------------------------------------------------------------------------------
 // Atomics
 // We need to be portable between C, C++, and MSVC.
-// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode.
+// We base the primitives on the C/C++ atomics and create a minimal wrapper for MSVC in C compilation mode.
 // This is why we try to use only `uintptr_t` and `<type>*` as atomic types.
 // To gain better insight in the range of used atomics, we use explicitly named memory order operations
 // instead of passing the memory order as a parameter.
@@ -20,26 +31,34 @@ terms of the MIT license. A copy of the license can be found in the file
 #if defined(__cplusplus)
 // Use C++ atomics
 #include <atomic>
-#define  _Atomic(tp)            std::atomic<tp>
-#define  mi_atomic(name)        std::atomic_##name
-#define  mi_memory_order(name)  std::memory_order_##name
-#if !defined(ATOMIC_VAR_INIT) || (__cplusplus >= 202002L) // c++20, see issue #571
- #define MI_ATOMIC_VAR_INIT(x)  x
+#define  _Atomic(tp)              std::atomic<tp>
+#define  mi_atomic(name)          std::atomic_##name
+#define  mi_memory_order(name)    std::memory_order_##name
+#if (__cplusplus >= 202002L)      // c++20, see issue #571
+ #define MI_ATOMIC_VAR_INIT(x)    x
+#elif !defined(ATOMIC_VAR_INIT)
+ #define MI_ATOMIC_VAR_INIT(x)    x
 #else
- #define MI_ATOMIC_VAR_INIT(x)  ATOMIC_VAR_INIT(x)
+ #define MI_ATOMIC_VAR_INIT(x)    ATOMIC_VAR_INIT(x)
 #endif
 #elif defined(_MSC_VER)
 // Use MSVC C wrapper for C11 atomics
-#define  _Atomic(tp)            tp
-#define  MI_ATOMIC_VAR_INIT(x)  x
-#define  mi_atomic(name)        mi_atomic_##name
-#define  mi_memory_order(name)  mi_memory_order_##name
+#define  _Atomic(tp)              tp
+#define  MI_ATOMIC_VAR_INIT(x)    x
+#define  mi_atomic(name)          mi_atomic_##name
+#define  mi_memory_order(name)    mi_memory_order_##name
 #else
 // Use C11 atomics
 #include <stdatomic.h>
-#define  mi_atomic(name)        atomic_##name
-#define  mi_memory_order(name)  memory_order_##name
-#define  MI_ATOMIC_VAR_INIT(x)  ATOMIC_VAR_INIT(x)
+#define  mi_atomic(name)          atomic_##name
+#define  mi_memory_order(name)    memory_order_##name
+#if (__STDC_VERSION__ >= 201710L) // c17, see issue #735
+ #define MI_ATOMIC_VAR_INIT(x)    x
+#elif !defined(ATOMIC_VAR_INIT)
+ #define MI_ATOMIC_VAR_INIT(x)    x
+#else
+ #define MI_ATOMIC_VAR_INIT(x)    ATOMIC_VAR_INIT(x)
+#endif
 #endif
 
 // Various defines for all used memory orders in mimalloc
@@ -53,18 +72,24 @@ terms of the MIT license. A copy of the license can be found in the file
 #define mi_atomic_load_relaxed(p)                mi_atomic(load_explicit)(p,mi_memory_order(relaxed))
 #define mi_atomic_store_release(p,x)             mi_atomic(store_explicit)(p,x,mi_memory_order(release))
 #define mi_atomic_store_relaxed(p,x)             mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed))
+#define mi_atomic_exchange_relaxed(p,x)          mi_atomic(exchange_explicit)(p,x,mi_memory_order(relaxed))
 #define mi_atomic_exchange_release(p,x)          mi_atomic(exchange_explicit)(p,x,mi_memory_order(release))
 #define mi_atomic_exchange_acq_rel(p,x)          mi_atomic(exchange_explicit)(p,x,mi_memory_order(acq_rel))
+
+#define mi_atomic_cas_weak_relaxed(p,exp,des)    mi_atomic_cas_weak(p,exp,des,mi_memory_order(relaxed),mi_memory_order(relaxed))
 #define mi_atomic_cas_weak_release(p,exp,des)    mi_atomic_cas_weak(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed))
 #define mi_atomic_cas_weak_acq_rel(p,exp,des)    mi_atomic_cas_weak(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire))
+#define mi_atomic_cas_strong_relaxed(p,exp,des)  mi_atomic_cas_strong(p,exp,des,mi_memory_order(relaxed),mi_memory_order(relaxed))
 #define mi_atomic_cas_strong_release(p,exp,des)  mi_atomic_cas_strong(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed))
 #define mi_atomic_cas_strong_acq_rel(p,exp,des)  mi_atomic_cas_strong(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire))
 
 #define mi_atomic_add_relaxed(p,x)               mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(relaxed))
-#define mi_atomic_sub_relaxed(p,x)               mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(relaxed))
 #define mi_atomic_add_acq_rel(p,x)               mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(acq_rel))
+#define mi_atomic_sub_relaxed(p,x)               mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(relaxed))
 #define mi_atomic_sub_acq_rel(p,x)               mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(acq_rel))
+#define mi_atomic_and_relaxed(p,x)               mi_atomic(fetch_and_explicit)(p,x,mi_memory_order(relaxed))
 #define mi_atomic_and_acq_rel(p,x)               mi_atomic(fetch_and_explicit)(p,x,mi_memory_order(acq_rel))
+#define mi_atomic_or_relaxed(p,x)                mi_atomic(fetch_or_explicit)(p,x,mi_memory_order(relaxed))
 #define mi_atomic_or_acq_rel(p,x)                mi_atomic(fetch_or_explicit)(p,x,mi_memory_order(acq_rel))
 
 #define mi_atomic_increment_relaxed(p)           mi_atomic_add_relaxed(p,(uintptr_t)1)
@@ -91,6 +116,8 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub);
 #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des)    mi_atomic_cas_weak_release(p,exp,(tp*)des)
 #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des)    mi_atomic_cas_weak_acq_rel(p,exp,(tp*)des)
 #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des)  mi_atomic_cas_strong_release(p,exp,(tp*)des)
+#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des)  mi_atomic_cas_strong_acq_rel(p,exp,(tp*)des)
+#define mi_atomic_exchange_ptr_relaxed(tp,p,x)          mi_atomic_exchange_relaxed(p,(tp*)x)
 #define mi_atomic_exchange_ptr_release(tp,p,x)          mi_atomic_exchange_release(p,(tp*)x)
 #define mi_atomic_exchange_ptr_acq_rel(tp,p,x)          mi_atomic_exchange_acq_rel(p,(tp*)x)
 #else
@@ -99,6 +126,8 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub);
 #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des)    mi_atomic_cas_weak_release(p,exp,des)
 #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des)    mi_atomic_cas_weak_acq_rel(p,exp,des)
 #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des)  mi_atomic_cas_strong_release(p,exp,des)
+#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des)  mi_atomic_cas_strong_acq_rel(p,exp,des)
+#define mi_atomic_exchange_ptr_relaxed(tp,p,x)          mi_atomic_exchange_relaxed(p,x)
 #define mi_atomic_exchange_ptr_release(tp,p,x)          mi_atomic_exchange_release(p,x)
 #define mi_atomic_exchange_ptr_acq_rel(tp,p,x)          mi_atomic_exchange_acq_rel(p,x)
 #endif
@@ -107,24 +136,30 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub);
 static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add) {
   return mi_atomic(fetch_add_explicit)((_Atomic(int64_t)*)p, add, mi_memory_order(relaxed));
 }
+static inline void mi_atomic_void_addi64_relaxed(volatile int64_t* p, const volatile int64_t* padd) {
+  const int64_t add = mi_atomic_load_relaxed((_Atomic(int64_t)*)padd);
+  if (add != 0) {
+    mi_atomic(fetch_add_explicit)((_Atomic(int64_t)*)p, add, mi_memory_order(relaxed));
+  }
+}
 static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) {
   int64_t current = mi_atomic_load_relaxed((_Atomic(int64_t)*)p);
   while (current < x && !mi_atomic_cas_weak_release((_Atomic(int64_t)*)p, &current, x)) { /* nothing */ };
 }
 
 // Used by timers
-#define mi_atomic_loadi64_acquire(p)    mi_atomic(load_explicit)(p,mi_memory_order(acquire))
-#define mi_atomic_loadi64_relaxed(p)    mi_atomic(load_explicit)(p,mi_memory_order(relaxed))
-#define mi_atomic_storei64_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release))
-#define mi_atomic_storei64_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed))
+#define mi_atomic_loadi64_acquire(p)            mi_atomic(load_explicit)(p,mi_memory_order(acquire))
+#define mi_atomic_loadi64_relaxed(p)            mi_atomic(load_explicit)(p,mi_memory_order(relaxed))
+#define mi_atomic_storei64_release(p,x)         mi_atomic(store_explicit)(p,x,mi_memory_order(release))
+#define mi_atomic_storei64_relaxed(p,x)         mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed))
 
+#define mi_atomic_casi64_strong_acq_rel(p,e,d)  mi_atomic_cas_strong_acq_rel(p,e,d)
+#define mi_atomic_addi64_acq_rel(p,i)           mi_atomic_add_acq_rel(p,i)
 
 
 #elif defined(_MSC_VER)
 
-// MSVC C compilation wrapper that uses Interlocked operations to model C11 atomics.
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
+// Legacy MSVC plain C compilation wrapper that uses Interlocked operations to model C11 atomics.
 #include <intrin.h>
 #ifdef _WIN64
 typedef LONG64   msc_intptr_t;
@@ -189,7 +224,7 @@ static inline uintptr_t mi_atomic_load_explicit(_Atomic(uintptr_t) const* p, mi_
 #else
   uintptr_t x = *p;
   if (mo > mi_memory_order_relaxed) {
-    while (!mi_atomic_compare_exchange_weak_explicit(p, &x, x, mo, mi_memory_order_relaxed)) { /* nothing */ };
+    while (!mi_atomic_compare_exchange_weak_explicit((_Atomic(uintptr_t)*)p, &x, x, mo, mi_memory_order_relaxed)) { /* nothing */ };
   }
   return x;
 #endif
@@ -238,6 +273,14 @@ static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)*p, int6
   return current;
 #endif
 }
+
+static inline void mi_atomic_void_addi64_relaxed(volatile int64_t* p, const volatile int64_t* padd) {
+  const int64_t add = *padd;
+  if (add != 0) {
+    mi_atomic_addi64_relaxed((volatile _Atomic(int64_t)*)p, add);
+  }
+}
+
 static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t x) {
   int64_t current;
   do {
@@ -245,6 +288,21 @@ static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t
   } while (current < x && _InterlockedCompareExchange64(p, x, current) != current);
 }
 
+static inline void mi_atomic_addi64_acq_rel(volatile _Atomic(int64_t*)p, int64_t i) {
+  mi_atomic_addi64_relaxed(p, i);
+}
+
+static inline bool mi_atomic_casi64_strong_acq_rel(volatile _Atomic(int64_t*)p, int64_t* exp, int64_t des) {
+  int64_t read = _InterlockedCompareExchange64(p, des, *exp);
+  if (read == *exp) {
+    return true;
+  }
+  else {
+    *exp = read;
+    return false;
+  }
+}
+
 // The pointer macros cast to `uintptr_t`.
 #define mi_atomic_load_ptr_acquire(tp,p)                (tp*)mi_atomic_load_acquire((_Atomic(uintptr_t)*)(p))
 #define mi_atomic_load_ptr_relaxed(tp,p)                (tp*)mi_atomic_load_relaxed((_Atomic(uintptr_t)*)(p))
@@ -253,6 +311,8 @@ static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t
 #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des)    mi_atomic_cas_weak_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
 #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des)    mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
 #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des)  mi_atomic_cas_strong_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
+#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des)  mi_atomic_cas_strong_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
+#define mi_atomic_exchange_ptr_relaxed(tp,p,x)          (tp*)mi_atomic_exchange_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)x)
 #define mi_atomic_exchange_ptr_release(tp,p,x)          (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x)
 #define mi_atomic_exchange_ptr_acq_rel(tp,p,x)          (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x)
 
@@ -275,15 +335,41 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) {
   return (intptr_t)mi_atomic_addi(p, -sub);
 }
 
+
+// ----------------------------------------------------------------------
+// Once and Guard
+// ----------------------------------------------------------------------
+
+typedef _Atomic(uintptr_t) mi_atomic_once_t;
+
+// Returns true only on the first invocation
+static inline bool mi_atomic_once( mi_atomic_once_t* once ) {
+  if (mi_atomic_load_relaxed(once) != 0) return false;     // quick test
+  uintptr_t expected = 0;
+  return mi_atomic_cas_strong_acq_rel(once, &expected, (uintptr_t)1); // try to set to 1
+}
+
+typedef _Atomic(uintptr_t) mi_atomic_guard_t;
+
+// Allows only one thread to execute at a time
+#define mi_atomic_guard(guard) \
+  uintptr_t _mi_guard_expected = 0; \
+  for(bool _mi_guard_once = true; \
+      _mi_guard_once && mi_atomic_cas_strong_acq_rel(guard,&_mi_guard_expected,(uintptr_t)1); \
+      (mi_atomic_store_release(guard,(uintptr_t)0), _mi_guard_once = false) )
+
+
+
+// ----------------------------------------------------------------------
 // Yield
+// ----------------------------------------------------------------------
+
 #if defined(__cplusplus)
 #include <thread>
 static inline void mi_atomic_yield(void) {
   std::this_thread::yield();
 }
 #elif defined(_WIN32)
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
 static inline void mi_atomic_yield(void) {
   YieldProcessor();
 }
@@ -293,8 +379,9 @@ static inline void mi_atomic_yield(void) {
   _mm_pause();
 }
 #elif (defined(__GNUC__) || defined(__clang__)) && \
-      (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__armel__) || defined(__ARMEL__) || \
-       defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__))
+      (defined(__x86_64__) || defined(__i386__) || \
+       defined(__aarch64__) || defined(__arm__) || \
+       defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__POWERPC__))
 #if defined(__x86_64__) || defined(__i386__)
 static inline void mi_atomic_yield(void) {
   __asm__ volatile ("pause" ::: "memory");
@@ -303,19 +390,27 @@ static inline void mi_atomic_yield(void) {
 static inline void mi_atomic_yield(void) {
   __asm__ volatile("wfe");
 }
-#elif (defined(__arm__) && __ARM_ARCH__ >= 7)
+#elif defined(__arm__)
+#if __ARM_ARCH >= 7
 static inline void mi_atomic_yield(void) {
   __asm__ volatile("yield" ::: "memory");
 }
-#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)
+#else
 static inline void mi_atomic_yield(void) {
-  __asm__ __volatile__ ("or 27,27,27" ::: "memory");
+  __asm__ volatile ("nop" ::: "memory");
 }
-#elif defined(__armel__) || defined(__ARMEL__)
+#endif
+#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__POWERPC__)
+#ifdef __APPLE__
 static inline void mi_atomic_yield(void) {
-  __asm__ volatile ("nop" ::: "memory");
+  __asm__ volatile ("or r27,r27,r27" ::: "memory");
+}
+#else
+static inline void mi_atomic_yield(void) {
+  __asm__ __volatile__ ("or 27,27,27" ::: "memory");
 }
 #endif
+#endif
 #elif defined(__sun)
 // Fallback for other archs
 #include <synch.h>
@@ -335,4 +430,137 @@ static inline void mi_atomic_yield(void) {
 #endif
 
 
-#endif // __MIMALLOC_ATOMIC_H
+// ----------------------------------------------------------------------
+// Locks
+// These should be light-weight in-process only locks.
+// Only used for reserving arena's and to maintain the abandoned list.
+// ----------------------------------------------------------------------
+#if _MSC_VER
+#pragma warning(disable:26110)  // unlock with holding lock
+#endif
+
+#define mi_lock(lock)    for(bool _go = (mi_lock_acquire(lock),true); _go; (mi_lock_release(lock), _go=false) )
+
+#if defined(_WIN32)
+
+#if 1
+#define mi_lock_t  SRWLOCK   // slim reader-writer lock
+
+static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
+  return TryAcquireSRWLockExclusive(lock);
+}
+static inline void mi_lock_acquire(mi_lock_t* lock) {
+  AcquireSRWLockExclusive(lock);
+}
+static inline void mi_lock_release(mi_lock_t* lock) {
+  ReleaseSRWLockExclusive(lock);
+}
+static inline void mi_lock_init(mi_lock_t* lock) {
+  InitializeSRWLock(lock);
+}
+static inline void mi_lock_done(mi_lock_t* lock) {
+  (void)(lock);
+}
+
+#else
+#define mi_lock_t  CRITICAL_SECTION
+
+static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
+  return TryEnterCriticalSection(lock);
+}
+static inline void mi_lock_acquire(mi_lock_t* lock) {
+  EnterCriticalSection(lock);
+}
+static inline void mi_lock_release(mi_lock_t* lock) {
+  LeaveCriticalSection(lock);
+}
+static inline void mi_lock_init(mi_lock_t* lock) {
+  InitializeCriticalSection(lock);
+}
+static inline void mi_lock_done(mi_lock_t* lock) {
+  DeleteCriticalSection(lock);
+}
+
+#endif
+
+#elif defined(MI_USE_PTHREADS)
+
+#include <string.h> // memcpy
+void _mi_error_message(int err, const char* fmt, ...);
+
+#define mi_lock_t  pthread_mutex_t
+
+static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
+  return (pthread_mutex_trylock(lock) == 0);
+}
+static inline void mi_lock_acquire(mi_lock_t* lock) {
+  const int err = pthread_mutex_lock(lock);
+  if (err != 0) {
+    _mi_error_message(err, "internal error: lock cannot be acquired (err %i)\n", err);
+  }
+}
+static inline void mi_lock_release(mi_lock_t* lock) {
+  pthread_mutex_unlock(lock);
+}
+static inline void mi_lock_init(mi_lock_t* lock) {
+  if(lock==NULL) return;
+  // use instead of pthread_mutex_init since that can cause allocation on some platforms (and recursively initialize)
+  const mi_lock_t temp_lock = PTHREAD_MUTEX_INITIALIZER;  
+  memcpy(lock,&temp_lock,sizeof(temp_lock));
+}
+static inline void mi_lock_done(mi_lock_t* lock) {
+  pthread_mutex_destroy(lock);
+}
+
+#elif defined(__cplusplus)
+
+#include <mutex>
+#define mi_lock_t  std::mutex
+
+static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
+  return lock->try_lock();
+}
+static inline void mi_lock_acquire(mi_lock_t* lock) {
+  lock->lock();
+}
+static inline void mi_lock_release(mi_lock_t* lock) {
+  lock->unlock();
+}
+static inline void mi_lock_init(mi_lock_t* lock) {
+  (void)(lock);
+}
+static inline void mi_lock_done(mi_lock_t* lock) {
+  (void)(lock);
+}
+
+#else
+
+// fall back to poor man's locks.
+// this should only be the case in a single-threaded environment (like __wasi__)
+
+#define mi_lock_t  _Atomic(uintptr_t)
+
+static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
+  uintptr_t expected = 0;
+  return mi_atomic_cas_strong_acq_rel(lock, &expected, (uintptr_t)1);
+}
+static inline void mi_lock_acquire(mi_lock_t* lock) {
+  for (int i = 0; i < 1000; i++) {  // for at most 1000 tries?
+    if (mi_lock_try_acquire(lock)) return;
+    mi_atomic_yield();
+  }
+}
+static inline void mi_lock_release(mi_lock_t* lock) {
+  mi_atomic_store_release(lock, (uintptr_t)0);
+}
+static inline void mi_lock_init(mi_lock_t* lock) {
+  mi_lock_release(lock);
+}
+static inline void mi_lock_done(mi_lock_t* lock) {
+  (void)(lock);
+}
+
+#endif
+
+
+#endif // MI_ATOMIC_H
diff --git a/3rd/mimalloc-3.2.6/include/mimalloc/bits.h b/3rd/mimalloc-3.2.6/include/mimalloc/bits.h
new file mode 100644
index 00000000..c40b32f6
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/include/mimalloc/bits.h
@@ -0,0 +1,342 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2019-2024 Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+/* ----------------------------------------------------------------------------
+  Bit operation, and platform dependent definition (MI_INTPTR_SIZE etc)
+---------------------------------------------------------------------------- */
+
+#pragma once
+#ifndef MI_BITS_H
+#define MI_BITS_H
+
+#include <stddef.h>   // size_t
+#include <stdint.h>   // int64_t etc
+#include <stdbool.h>  // bool
+
+// ------------------------------------------------------
+// Size of a pointer.
+// We assume that `sizeof(void*)==sizeof(intptr_t)`
+// and it holds for all platforms we know of.
+//
+// However, the C standard only requires that:
+//  p == (void*)((intptr_t)p))
+// but we also need:
+//  i == (intptr_t)((void*)i)
+// or otherwise one might define an intptr_t type that is larger than a pointer...
+// ------------------------------------------------------
+
+#if INTPTR_MAX > INT64_MAX
+# define MI_INTPTR_SHIFT (4)  // assume 128-bit  (as on arm CHERI for example)
+#elif INTPTR_MAX == INT64_MAX
+# define MI_INTPTR_SHIFT (3)
+#elif INTPTR_MAX == INT32_MAX
+# define MI_INTPTR_SHIFT (2)
+#else
+#error platform pointers must be 32, 64, or 128 bits
+#endif
+
+#if (INTPTR_MAX) > LONG_MAX
+# define MI_PU(x)  x##ULL
+#else
+# define MI_PU(x)  x##UL
+#endif
+
+#if SIZE_MAX == UINT64_MAX
+# define MI_SIZE_SHIFT (3)
+typedef int64_t  mi_ssize_t;
+#elif SIZE_MAX == UINT32_MAX
+# define MI_SIZE_SHIFT (2)
+typedef int32_t  mi_ssize_t;
+#else
+#error platform objects must be 32 or 64 bits in size
+#endif
+
+#if (SIZE_MAX/2) > LONG_MAX
+# define MI_ZU(x)  x##ULL
+#else
+# define MI_ZU(x)  x##UL
+#endif
+
+#define MI_INTPTR_SIZE  (1<<MI_INTPTR_SHIFT)
+#define MI_INTPTR_BITS  (MI_INTPTR_SIZE*8)
+
+#define MI_SIZE_SIZE  (1<<MI_SIZE_SHIFT)
+#define MI_SIZE_BITS  (MI_SIZE_SIZE*8)
+
+#define MI_KiB     (MI_ZU(1024))
+#define MI_MiB     (MI_KiB*MI_KiB)
+#define MI_GiB     (MI_MiB*MI_KiB)
+
+
+/* --------------------------------------------------------------------------------
+  Architecture
+-------------------------------------------------------------------------------- */
+
+#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC)  // consider arm64ec as arm64
+#define MI_ARCH_ARM64     1
+#elif defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
+#define MI_ARCH_X64       1
+#elif defined(__i386__) || defined(__i386) || defined(_M_IX86) || defined(_X86_) || defined(__X86__)
+#define MI_ARCH_X86       1
+#elif defined(__arm__) || defined(_ARM) || defined(_M_ARM)  || defined(_M_ARMT) || defined(__arm)
+#define MI_ARCH_ARM32     1
+#elif defined(__riscv) || defined(_M_RISCV)
+#define MI_ARCH_RISCV     1
+#if (LONG_MAX == INT32_MAX)
+#define MI_ARCH_RISCV32   1
+#else
+#define MI_ARCH_RISCV64   1
+#endif
+#endif
+
+#if MI_ARCH_X64 && defined(__AVX2__)
+#include <immintrin.h>
+#elif MI_ARCH_ARM64 && MI_OPT_SIMD
+#include <arm_neon.h>
+#endif
+#if defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
+#include <intrin.h>
+#endif
+
+#if MI_ARCH_X64 && defined(__AVX2__) && !defined(__BMI2__) // msvc
+#define __BMI2__  1
+#endif
+#if MI_ARCH_X64 && (defined(__AVX2__) || defined(__BMI2__)) && !defined(__BMI1__) // msvc
+#define __BMI1__  1
+#endif
+
+// Define big endian if needed
+// #define MI_BIG_ENDIAN  1
+
+// maximum virtual address bits in a user-space pointer
+#if MI_DEFAULT_VIRTUAL_ADDRESS_BITS > 0 
+#define MI_MAX_VABITS     MI_DEFAULT_VIRTUAL_ADDRESS_BITS
+#elif   MI_ARCH_X64
+#define MI_MAX_VABITS     (47)
+#elif MI_INTPTR_SIZE > 4
+#define MI_MAX_VABITS     (48)
+#else
+#define MI_MAX_VABITS     (32)
+#endif
+
+// use a flat page-map (or a 2-level one)
+#ifndef MI_PAGE_MAP_FLAT
+#if MI_MAX_VABITS <= 40 && !defined(__APPLE__) 
+#define MI_PAGE_MAP_FLAT  1
+#else
+#define MI_PAGE_MAP_FLAT  0
+#endif
+#endif
+
+
+/* --------------------------------------------------------------------------------
+  Builtin's
+-------------------------------------------------------------------------------- */
+
+#ifndef __has_builtin
+#define __has_builtin(x)  0
+#endif
+
+#define mi_builtin(name)        __builtin_##name
+#define mi_has_builtin(name)    __has_builtin(__builtin_##name)
+
+#if (LONG_MAX == INT32_MAX)
+#define mi_builtin32(name)       mi_builtin(name##l)
+#define mi_has_builtin32(name)   mi_has_builtin(name##l)
+#else
+#define mi_builtin32(name)       mi_builtin(name)
+#define mi_has_builtin32(name)   mi_has_builtin(name)
+#endif
+#if (LONG_MAX == INT64_MAX)
+#define mi_builtin64(name)       mi_builtin(name##l)
+#define mi_has_builtin64(name)   mi_has_builtin(name##l)
+#else
+#define mi_builtin64(name)       mi_builtin(name##ll)
+#define mi_has_builtin64(name)   mi_has_builtin(name##ll)
+#endif
+
+#if (MI_SIZE_BITS == 32)
+#define mi_builtinz(name)        mi_builtin32(name)
+#define mi_has_builtinz(name)    mi_has_builtin32(name)
+#define mi_msc_builtinz(name)    name
+#elif (MI_SIZE_BITS == 64)
+#define mi_builtinz(name)        mi_builtin64(name)
+#define mi_has_builtinz(name)    mi_has_builtin64(name)
+#define mi_msc_builtinz(name)    name##64
+#endif
+
+/* --------------------------------------------------------------------------------
+  Popcount and count trailing/leading zero's
+-------------------------------------------------------------------------------- */
+
+size_t _mi_popcount_generic(size_t x);
+
+static inline size_t mi_popcount(size_t x) {
+  #if mi_has_builtinz(popcount)
+    return mi_builtinz(popcount)(x);
+  #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
+    return mi_msc_builtinz(__popcnt)(x);
+  #elif MI_ARCH_X64 && defined(__BMI1__)
+    return (size_t)_mm_popcnt_u64(x);
+  #else
+    #define MI_HAS_FAST_POPCOUNT  0
+    return (x<=1 ? x : _mi_popcount_generic(x));
+  #endif
+}
+
+#ifndef MI_HAS_FAST_POPCOUNT
+#define MI_HAS_FAST_POPCOUNT 1
+#endif
+
+
+
+size_t _mi_clz_generic(size_t x);
+size_t _mi_ctz_generic(size_t x);
+
+static inline size_t mi_ctz(size_t x) {
+  #if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 tzcnt is defined for 0
+    size_t r;
+    __asm ("tzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc");
+    return r;
+  #elif defined(_MSC_VER) && MI_ARCH_X64 && defined(__BMI1__) 
+    return _tzcnt_u64(x);
+  #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
+    unsigned long idx;
+    return (mi_msc_builtinz(_BitScanForward)(&idx, x) ? (size_t)idx : MI_SIZE_BITS);
+  #elif mi_has_builtinz(ctz)
+    return (x!=0 ? (size_t)mi_builtinz(ctz)(x) : MI_SIZE_BITS);
+  #elif defined(__GNUC__) && (MI_ARCH_X64 || MI_ARCH_X86)
+    size_t r = MI_SIZE_BITS;  // bsf leaves destination unmodified if the argument is 0 (see <https://github.com/llvm/llvm-project/pull/102885>)
+    __asm ("bsf\t%1, %0" : "+r"(r) : "r"(x) : "cc");
+    return r;
+  #elif MI_HAS_FAST_POPCOUNT
+    return (x!=0 ? (mi_popcount(x^(x-1))-1) : MI_SIZE_BITS);
+  #else
+    #define MI_HAS_FAST_BITSCAN  0
+    return (x!=0 ? _mi_ctz_generic(x) : MI_SIZE_BITS);
+  #endif
+}
+
+static inline size_t mi_clz(size_t x) {
+  #if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 lzcnt is defined for 0
+    size_t r;
+    __asm ("lzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc");
+    return r;
+  #elif defined(_MSC_VER) && MI_ARCH_X64 && defined(__BMI1__) 
+    return _lzcnt_u64(x);
+  #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
+    unsigned long idx;
+    return (mi_msc_builtinz(_BitScanReverse)(&idx, x) ? MI_SIZE_BITS - 1 - (size_t)idx : MI_SIZE_BITS);
+  #elif mi_has_builtinz(clz)
+    return (x!=0 ? (size_t)mi_builtinz(clz)(x) : MI_SIZE_BITS);
+  #elif defined(__GNUC__) && (MI_ARCH_X64 || MI_ARCH_X86)
+    if (x==0) return MI_SIZE_BITS;
+    size_t r;
+    __asm ("bsr\t%1, %0" : "=r"(r) : "r"(x) : "cc");
+    return (MI_SIZE_BITS - 1 - r);
+  #else
+    #define MI_HAS_FAST_BITSCAN  0
+    return (x!=0 ? _mi_clz_generic(x) : MI_SIZE_BITS);
+  #endif
+}
+
+#ifndef MI_HAS_FAST_BITSCAN
+#define MI_HAS_FAST_BITSCAN 1
+#endif
+
+/* --------------------------------------------------------------------------------
+  find trailing/leading zero  (bit scan forward/reverse)
+-------------------------------------------------------------------------------- */
+
+// Bit scan forward: find the least significant bit that is set (i.e. count trailing zero's)
+// return false if `x==0` (with `*idx` undefined) and true otherwise,
+// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
+static inline bool mi_bsf(size_t x, size_t* idx) {
+  #if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) && (!defined(__clang_major__) || __clang_major__ >= 9)
+    // on x64 the carry flag is set on zero which gives better codegen
+    bool is_zero;
+    __asm ( "tzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc" );
+    return !is_zero;
+  #elif 0 && defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
+    unsigned long i;
+    return (mi_msc_builtinz(_BitScanForward)(&i, x) ? (*idx = (size_t)i, true) : false);
+  #else
+    return (x!=0 ? (*idx = mi_ctz(x), true) : false);
+  #endif
+}
+
+// Bit scan reverse: find the most significant bit that is set
+// return false if `x==0` (with `*idx` undefined) and true otherwise,
+// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
+static inline bool mi_bsr(size_t x, size_t* idx) {
+  #if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__)  && (!defined(__clang_major__) || __clang_major__ >= 9)
+    // on x64 the carry flag is set on zero which gives better codegen
+    bool is_zero;
+    __asm ("lzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc");
+    return !is_zero;
+  #elif 0 && defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
+    unsigned long i;
+    return (mi_msc_builtinz(_BitScanReverse)(&i, x) ? (*idx = (size_t)i, true) : false);
+  #else
+    return (x!=0 ? (*idx = MI_SIZE_BITS - 1 - mi_clz(x), true) : false);
+  #endif
+}
+
+
+/* --------------------------------------------------------------------------------
+  rotate
+-------------------------------------------------------------------------------- */
+
+static inline size_t mi_rotr(size_t x, size_t r) {
+  #if (mi_has_builtin(rotateright64) && MI_SIZE_BITS==64)
+    return mi_builtin(rotateright64)(x,r);
+  #elif (mi_has_builtin(rotateright32) && MI_SIZE_BITS==32)
+    return mi_builtin(rotateright32)(x,r);
+  #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_ARM64)
+    return _rotr64(x, (int)r);
+  #elif defined(_MSC_VER) && (MI_ARCH_X86 || MI_ARCH_ARM32)
+    return _lrotr(x,(int)r);
+  #else
+    // The term `(-rshift)&(BITS-1)` is written instead of `BITS - rshift` to
+    // avoid UB when `rshift==0`. See <https://blog.regehr.org/archives/1063>
+    const unsigned int rshift = (unsigned int)(r) & (MI_SIZE_BITS-1);
+    return ((x >> rshift) | (x << ((-rshift) & (MI_SIZE_BITS-1))));
+  #endif
+}
+
+static inline size_t mi_rotl(size_t x, size_t r) {
+  #if (mi_has_builtin(rotateleft64) && MI_SIZE_BITS==64)
+    return mi_builtin(rotateleft64)(x,r);
+  #elif (mi_has_builtin(rotateleft32) && MI_SIZE_BITS==32)
+    return mi_builtin(rotateleft32)(x,r);
+  #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_ARM64)
+    return _rotl64(x, (int)r);
+  #elif defined(_MSC_VER) && (MI_ARCH_X86 || MI_ARCH_ARM32)
+    return _lrotl(x, (int)r);
+  #else
+    // The term `(-rshift)&(BITS-1)` is written instead of `BITS - rshift` to
+    // avoid UB when `rshift==0`. See <https://blog.regehr.org/archives/1063>
+    const unsigned int rshift = (unsigned int)(r) & (MI_SIZE_BITS-1);
+    return ((x << rshift) | (x >> ((-rshift) & (MI_SIZE_BITS-1))));
+  #endif
+}
+
+static inline uint32_t mi_rotl32(uint32_t x, uint32_t r) {
+  #if mi_has_builtin(rotateleft32)
+    return mi_builtin(rotateleft32)(x,r);
+  #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
+    return _lrotl(x, (int)r);
+  #else
+    // The term `(-rshift)&(BITS-1)` is written instead of `BITS - rshift` to
+    // avoid UB when `rshift==0`. See <https://blog.regehr.org/archives/1063>
+    const unsigned int rshift = (unsigned int)(r) & 31;
+    return ((x << rshift) | (x >> ((-rshift) & 31)));
+  #endif
+}
+
+
+#endif // MI_BITS_H
diff --git a/3rd/mimalloc-3.2.6/include/mimalloc/internal.h b/3rd/mimalloc-3.2.6/include/mimalloc/internal.h
new file mode 100644
index 00000000..f1087d1b
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/include/mimalloc/internal.h
@@ -0,0 +1,1287 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2018-2025, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+#pragma once
+#ifndef MI_INTERNAL_H
+#define MI_INTERNAL_H
+
+// --------------------------------------------------------------------------
+// This file contains the internal API's of mimalloc and various utility
+// functions and macros.
+// --------------------------------------------------------------------------
+
+#include "types.h"
+#include "track.h"
+#include "bits.h"
+
+
+// --------------------------------------------------------------------------
+// Compiler defines
+// --------------------------------------------------------------------------
+
+#if (MI_DEBUG>0)
+#define mi_trace_message(...)  _mi_trace_message(__VA_ARGS__)
+#else
+#define mi_trace_message(...)
+#endif
+
+#define mi_decl_cache_align     mi_decl_align(64)
+
+#if defined(_MSC_VER)
+#pragma warning(disable:4127)   // suppress constant conditional warning (due to MI_SECURE paths)
+#pragma warning(disable:26812)  // unscoped enum warning
+#define mi_decl_forceinline     __forceinline
+#define mi_decl_noinline        __declspec(noinline)
+#define mi_decl_thread          __declspec(thread)
+#define mi_decl_noreturn        __declspec(noreturn)
+#define mi_decl_weak
+#define mi_decl_hidden
+#define mi_decl_cold
+#elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc
+#if !MI_TRACK_ASAN
+#define mi_decl_forceinline     __attribute__((always_inline))
+#else
+#define mi_decl_forceinline     inline
+#endif
+#define mi_decl_noinline        __attribute__((noinline))
+#define mi_decl_thread          __thread
+#define mi_decl_noreturn        __attribute__((noreturn))
+#define mi_decl_weak            __attribute__((weak))
+#define mi_decl_hidden          __attribute__((visibility("hidden")))
+#if (__GNUC__ >= 4) || defined(__clang__)
+#define mi_decl_cold            __attribute__((cold))
+#else
+#define mi_decl_cold
+#endif
+#elif __cplusplus >= 201103L    // c++11
+#define mi_decl_forceinline     inline
+#define mi_decl_noinline
+#define mi_decl_thread          thread_local
+#define mi_decl_noreturn        [[noreturn]]
+#define mi_decl_weak
+#define mi_decl_hidden
+#define mi_decl_cold
+#else
+#define mi_decl_forceinline     inline
+#define mi_decl_noinline
+#define mi_decl_thread          __thread        // hope for the best :-)
+#define mi_decl_noreturn
+#define mi_decl_weak
+#define mi_decl_hidden
+#define mi_decl_cold
+#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+#define mi_unlikely(x)     (__builtin_expect(!!(x),false))
+#define mi_likely(x)       (__builtin_expect(!!(x),true))
+#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)
+#define mi_unlikely(x)     (x) [[unlikely]]
+#define mi_likely(x)       (x) [[likely]]
+#else
+#define mi_unlikely(x)     (x)
+#define mi_likely(x)       (x)
+#endif
+
+#ifndef __has_builtin
+#define __has_builtin(x)    0
+#endif
+
+#if defined(__cplusplus)
+#define mi_decl_externc     extern "C"
+#else
+#define mi_decl_externc
+#endif
+
+#if (defined(__GNUC__) && (__GNUC__ >= 7)) || defined(__clang__) // includes clang and icc
+#define mi_decl_maybe_unused    __attribute__((unused))
+#elif __cplusplus >= 201703L    // c++17
+#define mi_decl_maybe_unused    [[maybe_unused]]
+#else
+#define mi_decl_maybe_unused
+#endif
+
+#if defined(__cplusplus)
+#define mi_decl_externc         extern "C"
+#else
+#define mi_decl_externc
+#endif
+
+
+#if defined(__EMSCRIPTEN__) && !defined(__wasi__)
+#define __wasi__
+#endif
+
+
+// --------------------------------------------------------------------------
+// Internal functions
+// --------------------------------------------------------------------------
+
+
+// "libc.c"
+#include <stdarg.h>
+int           _mi_vsnprintf(char* buf, size_t bufsize, const char* fmt, va_list args);
+int           _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...);
+char          _mi_toupper(char c);
+int           _mi_strnicmp(const char* s, const char* t, size_t n);
+void          _mi_strlcpy(char* dest, const char* src, size_t dest_size);
+void          _mi_strlcat(char* dest, const char* src, size_t dest_size);
+size_t        _mi_strlen(const char* s);
+size_t        _mi_strnlen(const char* s, size_t max_len);
+char*         _mi_strnstr(char* s, size_t max_len, const char* pat);
+bool          _mi_getenv(const char* name, char* result, size_t result_size);
+
+// "options.c"
+void          _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message);
+void          _mi_fprintf(mi_output_fun* out, void* arg, const char* fmt, ...);
+void          _mi_raw_message(const char* fmt, ...);
+void          _mi_message(const char* fmt, ...);
+void          _mi_warning_message(const char* fmt, ...);
+void          _mi_verbose_message(const char* fmt, ...);
+void          _mi_trace_message(const char* fmt, ...);
+void          _mi_options_init(void);
+void          _mi_options_post_init(void);
+long          _mi_option_get_fast(mi_option_t option);
+void          _mi_error_message(int err, const char* fmt, ...);
+
+// random.c
+void          _mi_random_init(mi_random_ctx_t* ctx);
+void          _mi_random_init_weak(mi_random_ctx_t* ctx);
+void          _mi_random_reinit_if_weak(mi_random_ctx_t * ctx);
+void          _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx);
+uintptr_t     _mi_random_next(mi_random_ctx_t* ctx);
+uintptr_t     _mi_theap_random_next(mi_theap_t* theap);
+uintptr_t     _mi_os_random_weak(uintptr_t extra_seed);
+static inline uintptr_t _mi_random_shuffle(uintptr_t x);
+
+// init.c
+extern mi_decl_hidden mi_decl_cache_align const mi_page_t  _mi_page_empty;
+void          _mi_auto_process_init(void);
+void mi_cdecl _mi_auto_process_done(void) mi_attr_noexcept;
+bool          _mi_is_redirected(void);
+bool          _mi_allocator_init(const char** message);
+void          _mi_allocator_done(void);
+bool          _mi_is_main_thread(void);
+bool          _mi_preloading(void);           // true while the C runtime is not initialized yet
+void          _mi_thread_done(mi_theap_t* theap);
+
+mi_subproc_t* _mi_subproc(void);
+mi_heap_t*    _mi_subproc_heap_main(mi_subproc_t* subproc);
+mi_subproc_t* _mi_subproc_from_id(mi_subproc_id_t subproc_id);
+
+mi_threadid_t _mi_thread_id(void) mi_attr_noexcept;
+size_t        _mi_thread_seq_id(void) mi_attr_noexcept;
+bool          _mi_is_heap_main(const mi_heap_t* heap);
+void          _mi_theap_guarded_init(mi_theap_t* theap);
+void          _mi_theap_options_init(mi_theap_t* theap);
+mi_theap_t*   _mi_theap_default_safe(void);             // ensure the returned theap is initialized
+
+// os.c
+void          _mi_os_init(void);                                            // called from process init
+void*         _mi_os_alloc(size_t size, mi_memid_t* memid);
+void*         _mi_os_zalloc(size_t size, mi_memid_t* memid);
+void          _mi_os_free(void* p, size_t size, mi_memid_t memid);
+void          _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid, mi_subproc_t* subproc );
+
+size_t        _mi_os_page_size(void);
+size_t        _mi_os_guard_page_size(void);
+size_t        _mi_os_good_alloc_size(size_t size);
+bool          _mi_os_has_overcommit(void);
+bool          _mi_os_has_virtual_reserve(void);
+size_t        _mi_os_virtual_address_bits(void);
+size_t        _mi_os_minimal_purge_size(void);
+
+bool          _mi_os_reset(void* addr, size_t size);
+bool          _mi_os_decommit(void* addr, size_t size);
+void          _mi_os_reuse(void* p, size_t size);
+mi_decl_nodiscard bool _mi_os_commit(void* p, size_t size, bool* is_zero);
+mi_decl_nodiscard bool _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size);
+mi_decl_nodiscard bool _mi_os_protect(void* addr, size_t size);
+bool          _mi_os_unprotect(void* addr, size_t size);
+bool          _mi_os_purge(void* p, size_t size);
+bool          _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stats_size, mi_commit_fun_t* commit_fun, void* commit_fun_arg);
+
+size_t        _mi_os_secure_guard_page_size(void);
+bool          _mi_os_secure_guard_page_set_at(void* addr, mi_memid_t memid);
+bool          _mi_os_secure_guard_page_set_before(void* addr, mi_memid_t memid);
+bool          _mi_os_secure_guard_page_reset_at(void* addr, mi_memid_t memid);
+bool          _mi_os_secure_guard_page_reset_before(void* addr, mi_memid_t memid);
+
+int           _mi_os_numa_node(void);
+int           _mi_os_numa_node_count(void);
+
+void*         _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid);
+void*         _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid);
+
+void*         _mi_os_get_aligned_hint(size_t try_alignment, size_t size);
+bool          _mi_os_canuse_large_page(size_t size, size_t alignment);
+size_t        _mi_os_large_page_size(void);
+void*         _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid);
+
+// threadlocal.c
+
+mi_thread_local_t _mi_thread_local_create(void);
+void          _mi_thread_local_free( mi_thread_local_t key );
+bool          _mi_thread_local_set(  mi_thread_local_t key, void* val );
+void*         _mi_thread_local_get(  mi_thread_local_t key );
+void          _mi_thread_locals_init(void);
+void          _mi_thread_locals_done(void);
+void          _mi_thread_locals_thread_done(void);
+
+// arena.c
+mi_arena_id_t _mi_arena_id_none(void);
+mi_arena_t*   _mi_arena_from_id(mi_arena_id_t id);
+bool          _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_t* request_arena);
+
+void*         _mi_arenas_alloc(mi_heap_t* heap, size_t size, bool commit, bool allow_pinned, mi_arena_t* req_arena, size_t tseq, int numa_node, mi_memid_t* memid);
+void*         _mi_arenas_alloc_aligned(mi_heap_t* heap, size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_pinned, mi_arena_t* req_arena, size_t tseq, int numa_node, mi_memid_t* memid);
+void          _mi_arenas_free(void* p, size_t size, mi_memid_t memid);
+bool          _mi_arenas_contain(const void* p);
+void          _mi_arenas_collect(bool force_purge, bool visit_all, mi_tld_t* tld);
+void          _mi_arenas_unsafe_destroy_all(mi_subproc_t* subproc);
+
+mi_page_t*    _mi_arenas_page_alloc(mi_theap_t* theap, size_t block_size, size_t page_alignment);
+void          _mi_arenas_page_free(mi_page_t* page, mi_theap_t* current_theapx /* can be NULL */);
+void          _mi_arenas_page_abandon(mi_page_t* page, mi_theap_t* current_theap);
+void          _mi_arenas_page_unabandon(mi_page_t* page, mi_theap_t* current_theapx /* can be NULL */);
+bool          _mi_arenas_page_try_reabandon_to_mapped(mi_page_t* page);
+
+// arena-meta.c
+void*         _mi_meta_zalloc( size_t size, mi_memid_t* memid );
+void          _mi_meta_free(void* p, size_t size, mi_memid_t memid);
+bool          _mi_meta_is_meta_page(void* p);
+
+// "page-map.c"
+bool          _mi_page_map_init(void);
+mi_decl_nodiscard bool _mi_page_map_register(mi_page_t* page);
+void          _mi_page_map_unregister(mi_page_t* page);
+void          _mi_page_map_unregister_range(void* start, size_t size);
+mi_page_t*    _mi_safe_ptr_page(const void* p);
+void          _mi_page_map_unsafe_destroy(mi_subproc_t* subproc);
+
+// "page.c"
+void*         _mi_malloc_generic(mi_theap_t* theap, size_t size, size_t zero_huge_alignment, size_t* usable)  mi_attr_noexcept mi_attr_malloc;
+
+void          _mi_page_retire(mi_page_t* page) mi_attr_noexcept;       // free the page if there are no other pages with many free blocks
+void          _mi_page_unfull(mi_page_t* page);
+void          _mi_page_free(mi_page_t* page, mi_page_queue_t* pq);     // free the page
+void          _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq);  // abandon the page, to be picked up by another thread...
+
+size_t        _mi_page_queue_append(mi_theap_t* theap, mi_page_queue_t* pq, mi_page_queue_t* append);
+void          _mi_deferred_free(mi_theap_t* theap, bool force);
+
+void          _mi_page_free_collect(mi_page_t* page, bool force);
+void          _mi_page_free_collect_partly(mi_page_t* page, mi_block_t* head);
+mi_decl_nodiscard bool _mi_page_init(mi_theap_t* theap, mi_page_t* page);
+bool          _mi_page_queue_is_valid(mi_theap_t* theap, const mi_page_queue_t* pq);
+
+size_t        _mi_page_stats_bin(const mi_page_t* page); // for stats
+size_t        _mi_bin_size(size_t bin);                  // for stats
+size_t        _mi_bin(size_t size);                      // for stats
+
+// "theap.c"
+mi_theap_t*   _mi_theap_create(mi_heap_t* heap, mi_tld_t* tld);
+void          _mi_theap_delete(mi_theap_t* theap);
+void          _mi_theap_default_set(mi_theap_t* theap);
+void          _mi_theap_cached_set(mi_theap_t* theap);
+void          _mi_theap_collect_retired(mi_theap_t* theap, bool force);
+bool          _mi_theap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* page, mi_block_visit_fun* visitor, void* arg);
+void          _mi_theap_page_reclaim(mi_theap_t* theap, mi_page_t* page);
+void          _mi_theap_free(mi_theap_t* theap);
+
+// "heap.c"
+void          _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page);
+mi_decl_cold  mi_theap_t* _mi_heap_theap_get_or_init(const mi_heap_t* heap);  // get (and possible create) the theap belonging to a heap
+mi_decl_cold  mi_theap_t* _mi_heap_theap_get_peek(const mi_heap_t* heap);     // get the theap for a heap without initializing (and return NULL in that case)
+void          _mi_heap_move_pages(mi_heap_t* heap_from, mi_heap_t* heap_to);  // in "arena.c"
+void          _mi_heap_destroy_pages(mi_heap_t* heap_from);                   // in "arena.c"
+
+
+// "stats.c"
+void          _mi_stats_init(void);
+void          _mi_stats_merge_into(mi_stats_t* to, mi_stats_t* from);
+
+mi_msecs_t    _mi_clock_now(void);
+mi_msecs_t    _mi_clock_end(mi_msecs_t start);
+mi_msecs_t    _mi_clock_start(void);
+
+// "alloc.c"
+void*         _mi_page_malloc(mi_theap_t* theap, mi_page_t* page, size_t size) mi_attr_noexcept;                  // called from `_mi_theap_malloc_aligned`
+void*         _mi_page_malloc_zeroed(mi_theap_t* theap, mi_page_t* page, size_t size) mi_attr_noexcept;           // called from `_mi_theap_malloc_aligned`
+void*         _mi_theap_malloc_zero(mi_theap_t* theap, size_t size, bool zero, size_t* usable) mi_attr_noexcept;
+void*         _mi_theap_malloc_zero_ex(mi_theap_t* theap, size_t size, bool zero, size_t huge_alignment, size_t* usable) mi_attr_noexcept;     // called from `_mi_theap_malloc_aligned`
+void*         _mi_theap_realloc_zero(mi_theap_t* theap, void* p, size_t newsize, bool zero, size_t* usable_pre, size_t* usable_post) mi_attr_noexcept;
+mi_block_t*   _mi_page_ptr_unalign(const mi_page_t* page, const void* p);
+void          _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size);
+
+#if MI_DEBUG>1
+bool          _mi_page_is_valid(mi_page_t* page);
+#endif
+
+
+// ------------------------------------------------------
+// Assertions
+// ------------------------------------------------------
+
+#if (MI_DEBUG)
+// use our own assertion to print without memory allocation
+mi_decl_noreturn mi_decl_cold void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line, const char* func) mi_attr_noexcept;
+#define mi_assert(expr)     ((expr) ? (void)0 : _mi_assert_fail(#expr,__FILE__,__LINE__,__func__))
+#else
+#define mi_assert(x)
+#endif
+
+#if (MI_DEBUG>1)
+#define mi_assert_internal    mi_assert
+#else
+#define mi_assert_internal(x)
+#endif
+
+#if (MI_DEBUG>2)
+#define mi_assert_expensive   mi_assert
+#else
+#define mi_assert_expensive(x)
+#endif
+
+
+/* -----------------------------------------------------------
+  Statistics (in `stats.c`)
+----------------------------------------------------------- */
+
+// add to stat keeping track of the peak
+void __mi_stat_increase(mi_stat_count_t* stat, size_t amount);
+void __mi_stat_decrease(mi_stat_count_t* stat, size_t amount);
+void __mi_stat_increase_mt(mi_stat_count_t* stat, size_t amount);
+void __mi_stat_decrease_mt(mi_stat_count_t* stat, size_t amount);
+
+// adjust stat in special cases to compensate for double counting (and does not adjust peak values and can decrease the total)
+void __mi_stat_adjust_increase(mi_stat_count_t* stat, size_t amount);
+void __mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount);
+void __mi_stat_adjust_increase_mt(mi_stat_count_t* stat, size_t amount);
+void __mi_stat_adjust_decrease_mt(mi_stat_count_t* stat, size_t amount);
+
+// counters can just be increased
+void __mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
+void __mi_stat_counter_increase_mt(mi_stat_counter_t* stat, size_t amount);
+
+#define mi_heap_stat_counter_increase(heap,stat,amount)         __mi_stat_counter_increase_mt( &(heap)->stats.stat, amount)
+#define mi_heap_stat_increase(heap,stat,amount)                 __mi_stat_increase_mt( &(heap)->stats.stat, amount)
+#define mi_heap_stat_decrease(heap,stat,amount)                 __mi_stat_decrease_mt( &(heap)->stats.stat, amount)
+#define mi_heap_stat_adjust_increase(heap,stat,amnt)            __mi_stat_adjust_increase_mt( &(heap)->stats.stat, amnt)
+#define mi_heap_stat_adjust_decrease(heap,stat,amnt)            __mi_stat_adjust_decrease_mt( &(heap)->stats.stat, amnt)
+
+#define mi_subproc_stat_counter_increase(subproc,stat,amount)   __mi_stat_counter_increase_mt( &(subproc)->stats.stat, amount)
+#define mi_subproc_stat_increase(subproc,stat,amount)           __mi_stat_increase_mt( &(subproc)->stats.stat, amount)
+#define mi_subproc_stat_decrease(subproc,stat,amount)           __mi_stat_decrease_mt( &(subproc)->stats.stat, amount)
+#define mi_subproc_stat_adjust_increase(subproc,stat,amount)    __mi_stat_adjust_increase_mt( &(subproc)->stats.stat, amount)
+#define mi_subproc_stat_adjust_decrease(subproc,stat,amount)    __mi_stat_adjust_decrease_mt( &(subproc)->stats.stat, amount)
+
+#define mi_os_stat_counter_increase(stat,amount)                mi_subproc_stat_counter_increase(_mi_subproc(),stat,amount)
+#define mi_os_stat_increase(stat,amount)                        mi_subproc_stat_increase(_mi_subproc(),stat,amount)
+#define mi_os_stat_decrease(stat,amount)                        mi_subproc_stat_decrease(_mi_subproc(),stat,amount)
+
+#define mi_theap_stat_counter_increase(theap,stat,amount)       __mi_stat_counter_increase( &(theap)->stats.stat, amount)
+#define mi_theap_stat_increase(theap,stat,amount)               __mi_stat_increase( &(theap)->stats.stat, amount)
+#define mi_theap_stat_decrease(theap,stat,amount)               __mi_stat_decrease( &(theap)->stats.stat, amount)
+#define mi_theap_stat_adjust_increase(theap,stat,amnt)          __mi_stat_adjust_increase( &(theap)->stats.stat, amnt)
+#define mi_theap_stat_adjust_decrease(theap,stat,amnt)          __mi_stat_adjust_decrease( &(theap)->stats.stat, amnt)
+
+
+/* -----------------------------------------------------------
+  Options (exposed for the debugger)
+----------------------------------------------------------- */
+typedef enum mi_option_init_e {
+  MI_OPTION_UNINIT,       // not yet initialized
+  MI_OPTION_DEFAULTED,    // not found in the environment, use default value
+  MI_OPTION_INITIALIZED   // found in environment or set explicitly
+} mi_option_init_t;
+
+typedef struct mi_option_desc_s {
+  long              value;  // the value
+  mi_option_init_t  init;   // is it initialized yet? (from the environment)
+  mi_option_t       option; // for debugging: the option index should match the option
+  const char*       name;   // option name without `mimalloc_` prefix
+  const char*       legacy_name; // potential legacy option name
+} mi_option_desc_t;
+
+
+
+/* -----------------------------------------------------------
+  Inlined definitions
+----------------------------------------------------------- */
+#define MI_UNUSED(x)     (void)(x)
+#if (MI_DEBUG>0)
+#define MI_UNUSED_RELEASE(x)
+#else
+#define MI_UNUSED_RELEASE(x)  MI_UNUSED(x)
+#endif
+
+#define MI_INIT4(x)   x(),x(),x(),x()
+#define MI_INIT8(x)   MI_INIT4(x),MI_INIT4(x)
+#define MI_INIT16(x)  MI_INIT8(x),MI_INIT8(x)
+#define MI_INIT32(x)  MI_INIT16(x),MI_INIT16(x)
+#define MI_INIT64(x)  MI_INIT32(x),MI_INIT32(x)
+#define MI_INIT128(x) MI_INIT64(x),MI_INIT64(x)
+#define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x)
+
+#define MI_INIT74(x)  MI_INIT64(x),MI_INIT8(x),x(),x()
+#define MI_INIT5(x)   MI_INIT4(x),x()
+
+#include <string.h>
+// initialize a local variable to zero; use memset as compilers optimize constant sized memset's
+#define _mi_memzero_var(x)  memset(&x,0,sizeof(x))
+
+// Is `x` a power of two? (0 is considered a power of two)
+static inline bool _mi_is_power_of_two(uintptr_t x) {
+  return ((x & (x - 1)) == 0);
+}
+
+// Is a pointer aligned?
+static inline bool _mi_is_aligned(void* p, size_t alignment) {
+  mi_assert_internal(alignment != 0);
+  return (((uintptr_t)p % alignment) == 0);
+}
+
+// Align upwards
+static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
+  mi_assert_internal(alignment != 0);
+  uintptr_t mask = alignment - 1;
+  if ((alignment & mask) == 0) {  // power of two?
+    return ((sz + mask) & ~mask);
+  }
+  else {
+    return (((sz + mask)/alignment)*alignment);
+  }
+}
+
+
+// Align a pointer upwards
+static inline uint8_t* _mi_align_up_ptr(void* p, size_t alignment) {
+  return (uint8_t*)_mi_align_up((uintptr_t)p, alignment);
+}
+
+
+static inline uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) {
+  mi_assert_internal(alignment != 0);
+  uintptr_t mask = alignment - 1;
+  if ((alignment & mask) == 0) { // power of two?
+    return (sz & ~mask);
+  }
+  else {
+    return ((sz / alignment) * alignment);
+  }
+}
+
+static inline void* mi_align_down_ptr(void* p, size_t alignment) {
+  return (void*)_mi_align_down((uintptr_t)p, alignment);
+}
+
+// Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`.
+static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) {
+  mi_assert_internal(divider != 0);
+  return (divider == 0 ? size : ((size + divider - 1) / divider));
+}
+
+
+// clamp an integer
+static inline size_t _mi_clamp(size_t sz, size_t min, size_t max) {
+  if (sz < min) return min;
+  else if (sz > max) return max;
+  else return sz;
+}
+
+// Is memory zero initialized?
+static inline bool mi_mem_is_zero(const void* p, size_t size) {
+  for (size_t i = 0; i < size; i++) {
+    if (((uint8_t*)p)[i] != 0) return false;
+  }
+  return true;
+}
+
+// Align a byte size to a size in _machine words_,
+// i.e. byte size == `wsize*sizeof(void*)`.
+static inline size_t _mi_wsize_from_size(size_t size) {
+  mi_assert_internal(size <= SIZE_MAX - sizeof(uintptr_t));
+  return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t);
+}
+
+// Overflow detecting multiply
+#if __has_builtin(__builtin_umul_overflow) || (defined(__GNUC__) && (__GNUC__ >= 5))
+#include <limits.h>      // UINT_MAX, ULONG_MAX
+#if defined(_CLOCK_T)    // for Illumos
+#undef _CLOCK_T
+#endif
+static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
+  #if (SIZE_MAX == ULONG_MAX)
+    return __builtin_umull_overflow(count, size, (unsigned long *)total);
+  #elif (SIZE_MAX == UINT_MAX)
+    return __builtin_umul_overflow(count, size, (unsigned int *)total);
+  #else
+    return __builtin_umulll_overflow(count, size, (unsigned long long *)total);
+  #endif
+}
+#else /* __builtin_umul_overflow is unavailable */
+static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
+  *total = count*size;
+  if mi_likely(((size|count)>>(4*MI_SIZE_SIZE))==0) {  // did size and count fit both in the lower half bits of a size_t?
+    return false;
+  }
+  else {
+    return (size!=0 && (SIZE_MAX / size) < count);
+  }
+}
+#endif
+
+// Safe multiply `count*size` into `total`; return `true` on overflow.
+static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* total) {
+  if (count==1) {  // quick check for the case where count is one (common for C++ allocators)
+    *total = size;
+    return false;
+  }
+  else if mi_likely(!mi_mul_overflow(count, size, total)) {
+    return false;
+  }
+  else {
+    #if MI_DEBUG > 0
+    _mi_error_message(EOVERFLOW, "allocation request is too large (%zu * %zu bytes)\n", count, size);
+    #endif
+    *total = SIZE_MAX;
+    return true;
+  }
+}
+
+
+/*----------------------------------------------------------------------------------------
+  Heap functions
+------------------------------------------------------------------------------------------- */
+
+extern mi_decl_hidden const mi_theap_t _mi_theap_empty;       // read-only empty theap, initial value of the thread local default theap (in the MI_TLS_MODEL_THREAD_LOCAL)
+extern mi_decl_hidden const mi_theap_t _mi_theap_empty_wrong; // read-only empty theap used to signal that a theap for a heap could not be allocated
+
+static inline bool mi_theap_is_initialized(const mi_theap_t* theap) {
+  return (theap != NULL && theap->heap != NULL);
+}
+
+static inline mi_page_t* _mi_theap_get_free_small_page(mi_theap_t* theap, size_t size) {
+  mi_assert_internal(size <= (MI_SMALL_SIZE_MAX + MI_PADDING_SIZE));
+  const size_t idx = _mi_wsize_from_size(size);
+  mi_assert_internal(idx < MI_PAGES_DIRECT);
+  return theap->pages_free_direct[idx];
+}
+
+
+//static inline uintptr_t _mi_ptr_cookie(const void* p) {
+//  extern mi_theap_t _mi_theap_main;
+//  mi_assert_internal(_mi_theap_main.cookie != 0);
+//  return ((uintptr_t)p ^ _mi_theap_main.cookie);
+//}
+
+
+/* -----------------------------------------------------------
+  The page map maps addresses to `mi_page_t` pointers
+----------------------------------------------------------- */
+
+#if MI_PAGE_MAP_FLAT
+
+// flat page-map committed on demand, using one byte per slice (64 KiB).
+// single indirection and low commit, but large initial virtual reserve (4 GiB with 48 bit virtual addresses)
+// used by default on <= 40 bit virtual address spaces.
+extern mi_decl_hidden uint8_t* _mi_page_map;
+
+static inline size_t _mi_page_map_index(const void* p) {
+  return (size_t)((uintptr_t)p >> MI_ARENA_SLICE_SHIFT);
+}
+
+static inline mi_page_t* _mi_ptr_page_ex(const void* p, bool* valid) {
+  const size_t idx = _mi_page_map_index(p);
+  const size_t ofs = _mi_page_map[idx];
+  if (valid != NULL) { *valid = (ofs != 0); }
+  return (mi_page_t*)((((uintptr_t)p >> MI_ARENA_SLICE_SHIFT) + 1 - ofs) << MI_ARENA_SLICE_SHIFT);
+}
+
+static inline mi_page_t* _mi_checked_ptr_page(const void* p) {
+  bool valid;
+  mi_page_t* const page = _mi_ptr_page_ex(p, &valid);
+  return (valid ? page : NULL);
+}
+
+static inline mi_page_t* _mi_unchecked_ptr_page(const void* p) {
+  return _mi_ptr_page_ex(p, NULL);
+}
+
+#else
+
+// 2-level page map:
+// double indirection, but low commit and low virtual reserve.
+//
+// the page-map is usually 4 MiB (for 48 bit virtual addresses) and points to sub maps of 64 KiB.
+// the page-map is committed on-demand (in 64 KiB parts) (and sub-maps are committed on-demand as well)
+// one sub page-map = 64 KiB => covers 2^(16-3) * 2^16 = 2^29 = 512 MiB address space
+// the page-map needs 48-(16+13) = 19 bits => 2^19 sub map pointers = 2^22 bytes = 4 MiB reserved size.
+#define MI_PAGE_MAP_SUB_SHIFT     (13)
+#define MI_PAGE_MAP_SUB_COUNT     (MI_ZU(1) << MI_PAGE_MAP_SUB_SHIFT)
+#define MI_PAGE_MAP_SHIFT         (MI_MAX_VABITS - MI_PAGE_MAP_SUB_SHIFT - MI_ARENA_SLICE_SHIFT)
+#define MI_PAGE_MAP_COUNT         (MI_ZU(1) << MI_PAGE_MAP_SHIFT)
+
+typedef mi_page_t**   mi_submap_t;
+extern mi_decl_hidden _Atomic(mi_submap_t)* _mi_page_map;
+
+static inline size_t _mi_page_map_index(const void* p, size_t* sub_idx) {
+  const size_t u = (size_t)((uintptr_t)p / MI_ARENA_SLICE_SIZE);
+  if (sub_idx != NULL) { *sub_idx = u % MI_PAGE_MAP_SUB_COUNT; }
+  return (u / MI_PAGE_MAP_SUB_COUNT);
+}
+
+static inline mi_submap_t _mi_page_map_at(size_t idx) {
+  return mi_atomic_load_ptr_relaxed(mi_page_t*, &_mi_page_map[idx]);
+}
+
+static inline mi_page_t* _mi_unchecked_ptr_page(const void* p) {
+  size_t sub_idx;
+  const size_t idx = _mi_page_map_index(p, &sub_idx);
+  return (_mi_page_map_at(idx))[sub_idx];  // NULL if p==NULL
+}
+
+static inline mi_page_t* _mi_checked_ptr_page(const void* p) {
+  size_t sub_idx;
+  const size_t idx = _mi_page_map_index(p, &sub_idx);
+  mi_submap_t const sub = _mi_page_map_at(idx);
+  if mi_unlikely(sub == NULL) return NULL;
+  return sub[sub_idx];
+}
+
+#endif
+
+
+static inline mi_page_t* _mi_ptr_page(const void* p) {
+  mi_assert_internal(p==NULL || mi_is_in_heap_region(p));
+  #if MI_DEBUG || MI_SECURE || defined(__APPLE__)
+  return _mi_checked_ptr_page(p);
+  #else
+  return _mi_unchecked_ptr_page(p);
+  #endif
+}
+
+
+// Get the block size of a page
+static inline size_t mi_page_block_size(const mi_page_t* page) {
+  mi_assert_internal(page->block_size > 0);
+  return page->block_size;
+}
+
+// Page start
+static inline uint8_t* mi_page_start(const mi_page_t* page) {
+  return page->page_start;
+}
+
+static inline size_t mi_page_size(const mi_page_t* page) {
+  return mi_page_block_size(page) * page->reserved;
+}
+
+static inline uint8_t* mi_page_area(const mi_page_t* page, size_t* size) {
+  if (size) { *size = mi_page_size(page); }
+  return mi_page_start(page);
+}
+
+static inline size_t mi_page_info_size(void) {
+  return _mi_align_up(sizeof(mi_page_t), MI_MAX_ALIGN_SIZE);
+}
+
+static inline bool mi_page_contains_address(const mi_page_t* page, const void* p) {
+  size_t psize;
+  uint8_t* start = mi_page_area(page, &psize);
+  return (start <= (uint8_t*)p && (uint8_t*)p < start + psize);
+}
+
+static inline bool mi_page_is_in_arena(const mi_page_t* page) {
+  return (page->memid.memkind == MI_MEM_ARENA);
+}
+
+static inline bool mi_page_is_singleton(const mi_page_t* page) {
+  return (page->reserved == 1);
+}
+
+// Get the usable block size of a page without fixed padding.
+// This may still include internal padding due to alignment and rounding up size classes.
+static inline size_t mi_page_usable_block_size(const mi_page_t* page) {
+  return mi_page_block_size(page) - MI_PADDING_SIZE;
+}
+
+// This may change if we locate page info outside the page data slices
+static inline uint8_t* mi_page_slice_start(const mi_page_t* page) {
+  return (uint8_t*)page;
+}
+
+// This gives the offset relative to the start slice of a page. This may change if we ever
+// locate page info outside the page-data itself.
+static inline size_t mi_page_slice_offset_of(const mi_page_t* page, size_t offset_relative_to_page_start) {
+  return (page->page_start - mi_page_slice_start(page)) + offset_relative_to_page_start;
+}
+
+static inline size_t mi_page_committed(const mi_page_t* page) {
+  return (page->slice_committed == 0 ? mi_page_size(page) : page->slice_committed - (page->page_start - mi_page_slice_start(page)));
+}
+
+// are all blocks in a page freed?
+// note: needs up-to-date used count, (as the `xthread_free` list may not be empty). see `_mi_page_collect_free`.
+static inline bool mi_page_all_free(const mi_page_t* page) {
+  mi_assert_internal(page != NULL);
+  return (page->used == 0);
+}
+
+// are there immediately available blocks, i.e. blocks available on the free list.
+static inline bool mi_page_immediate_available(const mi_page_t* page) {
+  mi_assert_internal(page != NULL);
+  return (page->free != NULL);
+}
+
+
+// is the page not yet used up to its reserved space?
+static inline bool mi_page_is_expandable(const mi_page_t* page) {
+  mi_assert_internal(page != NULL);
+  mi_assert_internal(page->capacity <= page->reserved);
+  return (page->capacity < page->reserved);
+}
+
+
+static inline bool mi_page_is_full(mi_page_t* page) {
+  const bool full = (page->reserved == page->used);
+  mi_assert_internal(!full || page->free == NULL);
+  return full;
+}
+
+// is more than 7/8th of a page in use?
+static inline bool mi_page_is_mostly_used(const mi_page_t* page) {
+  if (page==NULL) return true;
+  uint16_t frac = page->reserved / 8U;
+  return (page->reserved - page->used <= frac);
+}
+
+// is more than (n-1)/n'th of a page in use?
+static inline bool mi_page_is_used_at_frac(const mi_page_t* page, uint16_t n) {
+  if (page==NULL) return true;
+  uint16_t frac = page->reserved / n;
+  return (page->reserved - page->used <= frac);
+}
+
+
+static inline bool mi_page_is_huge(const mi_page_t* page) {
+  return (mi_page_is_singleton(page) &&
+          (page->block_size > MI_LARGE_MAX_OBJ_SIZE ||
+           (mi_memkind_is_os(page->memid.memkind) && page->memid.mem.os.base < (void*)page)));
+}
+
+static inline mi_page_queue_t* mi_page_queue(const mi_theap_t* theap, size_t size) {
+  mi_page_queue_t* const pq = &((mi_theap_t*)theap)->pages[_mi_bin(size)];
+  if (size <= MI_LARGE_MAX_OBJ_SIZE) { mi_assert_internal(pq->block_size <= MI_LARGE_MAX_OBJ_SIZE); }
+  return pq;
+}
+
+
+//-----------------------------------------------------------
+// Page thread id and flags
+//-----------------------------------------------------------
+
+// Thread id of thread that owns this page (with flags in the bottom 2 bits)
+static inline mi_threadid_t mi_page_xthread_id(const mi_page_t* page) {
+  return mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_id);
+}
+
+// Plain thread id of the thread that owns this page
+static inline mi_threadid_t mi_page_thread_id(const mi_page_t* page) {
+  return (mi_page_xthread_id(page) & ~MI_PAGE_FLAG_MASK);
+}
+
+static inline mi_page_flags_t mi_page_flags(const mi_page_t* page) {
+  return (mi_page_xthread_id(page) & MI_PAGE_FLAG_MASK);
+}
+
+static inline void mi_page_flags_set(mi_page_t* page, bool set, mi_page_flags_t newflag) {
+  if (set) { mi_atomic_or_relaxed(&page->xthread_id, newflag); }
+      else { mi_atomic_and_relaxed(&page->xthread_id, ~newflag); }
+}
+
+static inline bool mi_page_is_in_full(const mi_page_t* page) {
+  return ((mi_page_flags(page) & MI_PAGE_IN_FULL_QUEUE) != 0);
+}
+
+static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) {
+  mi_page_flags_set(page, in_full, MI_PAGE_IN_FULL_QUEUE);
+}
+
+static inline bool mi_page_has_interior_pointers(const mi_page_t* page) {
+  return ((mi_page_flags(page) & MI_PAGE_HAS_INTERIOR_POINTERS) != 0);
+}
+
+static inline void mi_page_set_has_interior_pointers(mi_page_t* page, bool has_aligned) {
+  mi_page_flags_set(page, has_aligned, MI_PAGE_HAS_INTERIOR_POINTERS);
+}
+
+static inline void mi_page_set_theap(mi_page_t* page, mi_theap_t* theap) {
+  // mi_assert_internal(!mi_page_is_in_full(page));  // can happen when destroying pages on theap_destroy
+  page->theap = theap;
+  const mi_threadid_t tid = (theap == NULL ? MI_THREADID_ABANDONED : theap->tld->thread_id);
+  mi_assert_internal((tid & MI_PAGE_FLAG_MASK) == 0);
+
+  // we need to use an atomic cas since a concurrent thread may still set the MI_PAGE_HAS_INTERIOR_POINTERS flag (see `alloc_aligned.c`).
+  mi_threadid_t xtid_old = mi_page_xthread_id(page);
+  mi_threadid_t xtid;
+  do {
+    xtid = tid | (xtid_old & MI_PAGE_FLAG_MASK);
+  } while (!mi_atomic_cas_weak_release(&page->xthread_id, &xtid_old, xtid));
+}
+
+static inline bool mi_page_is_abandoned(const mi_page_t* page) {
+  // note: the xtheap field of an abandoned theap is set to the subproc (for fast reclaim-on-free)
+  return (mi_page_thread_id(page) <= MI_THREADID_ABANDONED_MAPPED);
+}
+
+static inline bool mi_page_is_abandoned_mapped(const mi_page_t* page) {
+  return (mi_page_thread_id(page) == MI_THREADID_ABANDONED_MAPPED);
+}
+
+static inline void mi_page_set_abandoned_mapped(mi_page_t* page) {
+  mi_assert_internal(mi_page_is_abandoned(page));
+  mi_atomic_or_relaxed(&page->xthread_id, MI_THREADID_ABANDONED_MAPPED);
+}
+
+static inline void mi_page_clear_abandoned_mapped(mi_page_t* page) {
+  mi_assert_internal(mi_page_is_abandoned_mapped(page));
+  mi_atomic_and_relaxed(&page->xthread_id, MI_PAGE_FLAG_MASK);
+}
+
+
+static inline mi_theap_t* mi_page_theap(const mi_page_t* page) {
+  mi_assert_internal(!mi_page_is_abandoned(page));
+  mi_assert_internal(page->theap != NULL);
+  return page->theap;
+}
+
+static inline mi_tld_t* mi_page_tld(const mi_page_t* page) {
+  mi_assert_internal(!mi_page_is_abandoned(page));
+  mi_assert_internal(page->theap != NULL);
+  return page->theap->tld;
+}
+
+
+static inline mi_heap_t* mi_page_heap(const mi_page_t* page) {
+  mi_heap_t* heap = page->heap;
+  // we use NULL for the main heap to make `_mi_page_get_associated_theap` fast in `free.c:mi_abandoned_page_try_reclaim`.
+  if mi_likely(heap==NULL) heap = mi_heap_main();
+  mi_assert_internal(heap != NULL);
+  return heap;
+}
+
+//-----------------------------------------------------------
+// Thread free list and ownership
+//-----------------------------------------------------------
+
+// Thread free flag helpers
+static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) {
+  return (mi_block_t*)(tf & ~1);
+}
+static inline bool mi_tf_is_owned(mi_thread_free_t tf) {
+  return ((tf & 1) == 1);
+}
+static inline mi_thread_free_t mi_tf_create(mi_block_t* block, bool owned) {
+  return (mi_thread_free_t)((uintptr_t)block | (owned ? 1 : 0));
+}
+
+// Thread free access
+static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) {
+  return mi_tf_block(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free));
+}
+
+// are there any available blocks?
+static inline bool mi_page_has_any_available(const mi_page_t* page) {
+  mi_assert_internal(page != NULL && page->reserved > 0);
+  return (page->used < page->reserved || (mi_page_thread_free(page) != NULL));
+}
+
+// Owned?
+static inline bool mi_page_is_owned(const mi_page_t* page) {
+  return mi_tf_is_owned(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free));
+}
+
+// get ownership; returns true if the page was not owned before.
+static inline bool mi_page_claim_ownership(mi_page_t* page) {
+  const uintptr_t old = mi_atomic_or_acq_rel(&page->xthread_free, 1);
+  return ((old&1)==0);
+}
+
+
+/* -------------------------------------------------------------------
+  Guarded objects
+------------------------------------------------------------------- */
+#if MI_GUARDED
+
+// we always align guarded pointers in a block at an offset
+// the block `next` field is then used as a tag to distinguish regular offset aligned blocks from guarded ones
+#define MI_BLOCK_TAG_ALIGNED   ((mi_encoded_t)(0))
+#define MI_BLOCK_TAG_GUARDED   (~MI_BLOCK_TAG_ALIGNED)
+
+static inline bool mi_block_ptr_is_guarded(const mi_block_t* block, const void* p) {
+  const ptrdiff_t offset = (uint8_t*)p - (uint8_t*)block;
+  return (offset >= (ptrdiff_t)(sizeof(mi_block_t)) && block->next == MI_BLOCK_TAG_GUARDED);
+}
+
+static inline bool mi_theap_malloc_use_guarded(mi_theap_t* theap, size_t size) {
+  // this code is written to result in fast assembly as it is on the hot path for allocation
+  const size_t count = theap->guarded_sample_count - 1;  // if the rate was 0, this will underflow and count for a long time..
+  if mi_likely(count != 0) {
+    // no sample
+    theap->guarded_sample_count = count;
+    return false;
+  }
+  else if (size >= theap->guarded_size_min && size <= theap->guarded_size_max) {
+    // use guarded allocation
+    theap->guarded_sample_count = theap->guarded_sample_rate;  // reset
+    return (theap->guarded_sample_rate != 0);
+  }
+  else {
+    // failed size criteria, rewind count (but don't write to an empty theap)
+    if (theap->guarded_sample_rate != 0) { theap->guarded_sample_count = 1; }
+    return false;
+  }
+}
+
+mi_decl_restrict void* _mi_theap_malloc_guarded(mi_theap_t* theap, size_t size, bool zero) mi_attr_noexcept;
+
+#endif
+
+
+/* -------------------------------------------------------------------
+Encoding/Decoding the free list next pointers
+
+This is to protect against buffer overflow exploits where the
+free list is mutated. Many hardened allocators xor the next pointer `p`
+with a secret key `k1`, as `p^k1`. This prevents overwriting with known
+values but might be still too weak: if the attacker can guess
+the pointer `p` this  can reveal `k1` (since `p^k1^p == k1`).
+Moreover, if multiple blocks can be read as well, the attacker can
+xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot
+about the pointers (and subsequently `k1`).
+
+Instead mimalloc uses an extra key `k2` and encodes as `((p^k2)<<<k1)+k1`.
+Since these operations are not associative, the above approaches do not
+work so well any more even if the `p` can be guesstimated. For example,
+for the read case we can subtract two entries to discard the `+k1` term,
+but that leads to `((p1^k2)<<<k1) - ((p2^k2)<<<k1)` at best.
+We include the left-rotation since xor and addition are otherwise linear
+in the lowest bit. Finally, both keys are unique per page which reduces
+the re-use of keys by a large factor.
+
+We also pass a separate `null` value to be used as `NULL` or otherwise
+`(k2<<<k1)+k1` would appear (too) often as a sentinel value.
+------------------------------------------------------------------- */
+
+static inline bool mi_is_in_same_page(const void* p, const void* q) {
+  mi_page_t* page = _mi_ptr_page(p);
+  return mi_page_contains_address(page,q);
+  // return (_mi_ptr_page(p) == _mi_ptr_page(q));
+}
+
+static inline void* mi_ptr_decode(const void* null, const mi_encoded_t x, const uintptr_t* keys) {
+  void* p = (void*)(mi_rotr(x - keys[0], keys[0]) ^ keys[1]);
+  return (p==null ? NULL : p);
+}
+
+static inline mi_encoded_t mi_ptr_encode(const void* null, const void* p, const uintptr_t* keys) {
+  uintptr_t x = (uintptr_t)(p==NULL ? null : p);
+  return mi_rotl(x ^ keys[1], keys[0]) + keys[0];
+}
+
+static inline uint32_t mi_ptr_encode_canary(const void* null, const void* p, const uintptr_t* keys) {
+  const uint32_t x = (uint32_t)(mi_ptr_encode(null,p,keys));
+  // make the lowest byte 0 to prevent spurious read overflows which could be a security issue (issue #951)
+  #if MI_BIG_ENDIAN
+  return (x & 0x00FFFFFF);
+  #else
+  return (x & 0xFFFFFF00);
+  #endif
+}
+
+static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, const uintptr_t* keys ) {
+  mi_track_mem_defined(block,sizeof(mi_block_t));
+  mi_block_t* next;
+  #ifdef MI_ENCODE_FREELIST
+  next = (mi_block_t*)mi_ptr_decode(null, block->next, keys);
+  #else
+  MI_UNUSED(keys); MI_UNUSED(null);
+  next = (mi_block_t*)block->next;
+  #endif
+  mi_track_mem_noaccess(block,sizeof(mi_block_t));
+  return next;
+}
+
+static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) {
+  mi_track_mem_undefined(block,sizeof(mi_block_t));
+  #ifdef MI_ENCODE_FREELIST
+  block->next = mi_ptr_encode(null, next, keys);
+  #else
+  MI_UNUSED(keys); MI_UNUSED(null);
+  block->next = (mi_encoded_t)next;
+  #endif
+  mi_track_mem_noaccess(block,sizeof(mi_block_t));
+}
+
+static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) {
+  #ifdef MI_ENCODE_FREELIST
+  mi_block_t* next = mi_block_nextx(page,block,page->keys);
+  // check for free list corruption: is `next` at least in the same page?
+  // TODO: check if `next` is `page->block_size` aligned?
+  if mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next)) {
+    _mi_error_message(EFAULT, "corrupted free list entry of size %zub at %p: value 0x%zx\n", mi_page_block_size(page), block, (uintptr_t)next);
+    next = NULL;
+  }
+  return next;
+  #else
+  MI_UNUSED(page);
+  return mi_block_nextx(page,block,NULL);
+  #endif
+}
+
+static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) {
+  #ifdef MI_ENCODE_FREELIST
+  mi_block_set_nextx(page,block,next, page->keys);
+  #else
+  MI_UNUSED(page);
+  mi_block_set_nextx(page,block,next,NULL);
+  #endif
+}
+
+/* -----------------------------------------------------------
+  arena blocks
+----------------------------------------------------------- */
+
+// Blocks needed for a given byte size
+static inline size_t mi_slice_count_of_size(size_t size) {
+  return _mi_divide_up(size, MI_ARENA_SLICE_SIZE);
+}
+
+// Byte size of a number of blocks
+static inline size_t mi_size_of_slices(size_t bcount) {
+  return (bcount * MI_ARENA_SLICE_SIZE);
+}
+
+
+/* -----------------------------------------------------------
+  memory id's
+----------------------------------------------------------- */
+
+static inline mi_memid_t _mi_memid_create(mi_memkind_t memkind) {
+  mi_memid_t memid;
+  _mi_memzero_var(memid);
+  memid.memkind = memkind;
+  return memid;
+}
+
+static inline mi_memid_t _mi_memid_none(void) {
+  return _mi_memid_create(MI_MEM_NONE);
+}
+
+static inline mi_memid_t _mi_memid_create_os(void* base, size_t size, bool committed, bool is_zero, bool is_large) {
+  mi_memid_t memid = _mi_memid_create(MI_MEM_OS);
+  memid.mem.os.base = base;
+  memid.mem.os.size = size;
+  memid.initially_committed = committed;
+  memid.initially_zero = is_zero;
+  memid.is_pinned = is_large;
+  return memid;
+}
+
+static inline mi_memid_t _mi_memid_create_meta(void* mpage, size_t block_idx, size_t block_count) {
+  mi_memid_t memid = _mi_memid_create(MI_MEM_META);
+  memid.mem.meta.meta_page = mpage;
+  memid.mem.meta.block_index = (uint32_t)block_idx;
+  memid.mem.meta.block_count = (uint32_t)block_count;
+  memid.initially_committed = true;
+  memid.initially_zero = true;
+  memid.is_pinned = true;
+  return memid;
+}
+
+
+// -------------------------------------------------------------------
+// Fast "random" shuffle
+// -------------------------------------------------------------------
+
+static inline uintptr_t _mi_random_shuffle(uintptr_t x) {
+  if (x==0) { x = 17; }   // ensure we don't get stuck in generating zeros
+#if (MI_INTPTR_SIZE>=8)
+  // by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
+  x ^= x >> 30;
+  x *= 0xbf58476d1ce4e5b9UL;
+  x ^= x >> 27;
+  x *= 0x94d049bb133111ebUL;
+  x ^= x >> 31;
+#elif (MI_INTPTR_SIZE==4)
+  // by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
+  x ^= x >> 16;
+  x *= 0x7feb352dUL;
+  x ^= x >> 15;
+  x *= 0x846ca68bUL;
+  x ^= x >> 16;
+#endif
+  return x;
+}
+
+
+// ---------------------------------------------------------------------------------
+// Provide our own `_mi_memcpy` for potential performance optimizations.
+//
+// For now, only on x64/x86 we optimize to `rep movsb/stosb`.
+// Generally, we check for "fast short rep movsb" (FSRM) or "fast enhanced rep movsb" (ERMS) support
+// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253.
+// ---------------------------------------------------------------------------------
+
+#if !MI_TRACK_ENABLED && defined(_WIN32) && (MI_ARCH_X64 || MI_ARCH_X86)
+
+extern mi_decl_hidden bool _mi_cpu_has_fsrm;
+extern mi_decl_hidden bool _mi_cpu_has_erms;
+
+static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
+  if ((_mi_cpu_has_fsrm && n <= 128) || (_mi_cpu_has_erms && n > 128)) {
+    __movsb((unsigned char*)dst, (const unsigned char*)src, n);
+  }
+  else {
+    memcpy(dst, src, n);
+  }
+}
+
+static inline void _mi_memset(void* dst, int val, size_t n) {
+  if ((_mi_cpu_has_fsrm && n <= 128) || (_mi_cpu_has_erms && n > 128)) {
+    __stosb((unsigned char*)dst, (uint8_t)val, n);
+  }
+  else {
+    memset(dst, val, n);
+  }
+}
+
+static inline void _mi_memset_small(void* dst, int val, size_t n) {
+  mi_assert_internal(n<=2*MI_SMALL_SIZE_MAX);
+  __stosb((unsigned char*)dst, (uint8_t)val, n);
+}
+
+#elif !MI_TRACK_ENABLED && defined(__GNUC__) && (MI_ARCH_X64 || MI_ARCH_X86)
+
+extern mi_decl_hidden bool _mi_cpu_has_fsrm;
+extern mi_decl_hidden bool _mi_cpu_has_erms;
+
+static inline void _mi_movsb(void* dst, const void* src, size_t n) {
+  __asm volatile("rep movsb" : "+D"(dst), "+c"(n), "+S"(src) : : "memory");
+}
+
+static inline void _mi_stosb(void* dst, uint8_t val, size_t n) {
+  __asm volatile("rep stosb" : "+D"(dst), "+c"(n) : "a"(val) : "memory");
+}
+
+static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
+  if ((_mi_cpu_has_fsrm && n <= 128) || (_mi_cpu_has_erms && n > 128)) {
+    _mi_movsb(dst,src,n);
+  }
+  else {
+    memcpy(dst,src,n);
+  }
+}
+
+static inline void _mi_memset(void* dst, int val, size_t n) {
+  if ((_mi_cpu_has_fsrm && n <= 128) || (_mi_cpu_has_erms && n > 128)) {
+    _mi_stosb(dst, (uint8_t)val, n);
+  }
+  else {
+    memset(dst,val,n);
+  }
+}
+
+static inline void _mi_memset_small(void* dst, int val, size_t n) {
+  _mi_stosb(dst,val,n);
+}
+
+#else
+
+static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
+  memcpy(dst, src, n);
+}
+
+static inline void _mi_memset(void* dst, int val, size_t n) {
+  memset(dst, val, n);
+}
+
+static inline void _mi_memset_small(void* dst, int val, size_t n) {
+  mi_assert_internal(n<=2*MI_SMALL_SIZE_MAX);
+  memset(dst, val, n);
+}
+
+#endif
+
+// -------------------------------------------------------------------------------
+// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned
+// This is used for example in `mi_realloc`.
+// -------------------------------------------------------------------------------
+
+#if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__)
+
+// On GCC/CLang we provide a hint that the pointers are word aligned.
+static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
+  mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0));
+  void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE);
+  const void* asrc = __builtin_assume_aligned(src, MI_INTPTR_SIZE);
+  _mi_memcpy(adst, asrc, n);
+}
+
+static inline void _mi_memset_aligned(void* dst, int val, size_t n) {
+  mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0);
+  void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE);
+  _mi_memset(adst, val, n);
+}
+
+static inline void _mi_memset_aligned_small(void* dst, int val, size_t n) {
+  mi_assert_internal(n<=2*MI_SMALL_SIZE_MAX);
+  void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE);
+  _mi_memset_small(adst, val, n);
+}
+
+#else
+
+// Default fallback on `_mi_memcpy`
+static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
+  mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0));
+  _mi_memcpy(dst, src, n);
+}
+
+static inline void _mi_memset_aligned(void* dst, int val, size_t n) {
+  mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0);
+  _mi_memset(dst, val, n);
+}
+
+static inline void _mi_memset_aligned_small(void* dst, int val, size_t n) {
+  mi_assert_internal(n<=2*MI_SMALL_SIZE_MAX);
+  mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0);
+  _mi_memset_small(dst, val, n);
+}
+
+#endif
+
+static inline void _mi_memzero(void* dst, size_t n) {
+  _mi_memset(dst, 0, n);
+}
+
+static inline void _mi_memzero_small(void* dst, size_t n) {
+  mi_assert_internal(n<=2*MI_SMALL_SIZE_MAX);
+  _mi_memset_small(dst, 0, n);
+}
+
+static inline void _mi_memzero_aligned(void* dst, size_t n) {
+  _mi_memset_aligned(dst, 0, n);
+}
+
+static inline void _mi_memzero_aligned_small(void* dst, size_t n) {
+  mi_assert_internal(n<=2*MI_SMALL_SIZE_MAX);
+  _mi_memset_aligned_small(dst, 0, n);
+}
+
+
+#endif  // MI_INTERNAL_H
diff --git a/3rd/mimalloc-3.2.6/include/mimalloc/prim.h b/3rd/mimalloc-3.2.6/include/mimalloc/prim.h
new file mode 100644
index 00000000..0dc27327
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/include/mimalloc/prim.h
@@ -0,0 +1,501 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2018-2025, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+#pragma once
+#ifndef MIMALLOC_PRIM_H
+#define MIMALLOC_PRIM_H
+#include "internal.h"             // mi_decl_hidden
+
+// --------------------------------------------------------------------------
+// This file specifies the primitive portability API.
+// Each OS/host needs to implement these primitives, see `src/prim`
+// for implementations on Window, macOS, WASI, and Linux/Unix.
+//
+// note: on all primitive functions, we always have result parameters != NULL, and:
+//  addr != NULL and page aligned
+//  size > 0     and page aligned
+//  the return value is an error code as an `int` where 0 is success
+// --------------------------------------------------------------------------
+
+// OS memory configuration
+typedef struct mi_os_mem_config_s {
+  size_t  page_size;              // default to 4KiB
+  size_t  large_page_size;        // 0 if not supported, usually 2MiB (4MiB on Windows)
+  size_t  alloc_granularity;      // smallest allocation size (usually 4KiB, on Windows 64KiB)
+  size_t  physical_memory_in_kib; // physical memory size in KiB
+  size_t  virtual_address_bits;   // usually 48 or 56 bits on 64-bit systems. (used to determine secure randomization)
+  bool    has_overcommit;         // can we reserve more memory than can be actually committed?
+  bool    has_partial_free;       // can allocated blocks be freed partially? (true for mmap, false for VirtualAlloc)
+  bool    has_virtual_reserve;    // supports virtual address space reservation? (if true we can reserve virtual address space without using commit or physical memory)
+  bool    has_transparent_huge_pages;  // true if transparent huge pages are enabled (on Linux)
+} mi_os_mem_config_t;
+
+// Initialize
+void _mi_prim_mem_init( mi_os_mem_config_t* config );
+
+// Free OS memory
+int _mi_prim_free(void* addr, size_t size );
+
+// Allocate OS memory. Return NULL on error.
+// The `try_alignment` is just a hint and the returned pointer does not have to be aligned.
+// If `commit` is false, the virtual memory range only needs to be reserved (with no access)
+// which will later be committed explicitly using `_mi_prim_commit`.
+// `is_zero` is set to true if the memory was zero initialized (as on most OS's)
+// The `hint_addr` address is either `NULL` or a preferred allocation address but can be ignored.
+// pre: !commit => !allow_large
+//      try_alignment >= _mi_os_page_size() and a power of 2
+int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr);
+
+// Commit memory. Returns error code or 0 on success.
+// For example, on Linux this would make the memory PROT_READ|PROT_WRITE.
+// `is_zero` is set to true if the memory was zero initialized (e.g. on Windows)
+int _mi_prim_commit(void* addr, size_t size, bool* is_zero);
+
+// Decommit memory. Returns error code or 0 on success. The `needs_recommit` result is true
+// if the memory would need to be re-committed. For example, on Windows this is always true,
+// but on Linux we could use MADV_DONTNEED to decommit which does not need a recommit.
+// pre: needs_recommit != NULL
+int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit);
+
+// Reset memory. The range keeps being accessible but the content might be reset to zero at any moment.
+// Returns error code or 0 on success.
+int _mi_prim_reset(void* addr, size_t size);
+
+// Reuse memory. This is called for memory that is already committed but
+// may have been reset (`_mi_prim_reset`) or decommitted (`_mi_prim_decommit`) where `needs_recommit` was false.
+// Returns error code or 0 on success. On most platforms this is a no-op.
+int _mi_prim_reuse(void* addr, size_t size);
+
+// Protect memory. Returns error code or 0 on success.
+int _mi_prim_protect(void* addr, size_t size, bool protect);
+
+// Allocate huge (1GiB) pages possibly associated with a NUMA node.
+// `is_zero` is set to true if the memory was zero initialized (as on most OS's)
+// pre: size > 0  and a multiple of 1GiB.
+//      numa_node is either negative (don't care), or a numa node number.
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr);
+
+// Return the current NUMA node
+size_t _mi_prim_numa_node(void);
+
+// Return the number of logical NUMA nodes
+size_t _mi_prim_numa_node_count(void);
+
+// Clock ticks
+mi_msecs_t _mi_prim_clock_now(void);
+
+// Return process information (only for statistics)
+typedef struct mi_process_info_s {
+  mi_msecs_t  elapsed;
+  mi_msecs_t  utime;
+  mi_msecs_t  stime;
+  size_t      current_rss;
+  size_t      peak_rss;
+  size_t      current_commit;
+  size_t      peak_commit;
+  size_t      page_faults;
+} mi_process_info_t;
+
+void _mi_prim_process_info(mi_process_info_t* pinfo);
+
+// Default stderr output. (only for warnings etc. with verbose enabled)
+// msg != NULL && _mi_strlen(msg) > 0
+void _mi_prim_out_stderr( const char* msg );
+
+// Get an environment variable. (only for options)
+// name != NULL, result != NULL, result_size >= 64
+bool _mi_prim_getenv(const char* name, char* result, size_t result_size);
+
+
+// Fill a buffer with strong randomness; return `false` on error or if
+// there is no strong randomization available.
+bool _mi_prim_random_buf(void* buf, size_t buf_len);
+
+// Called on the first thread start, and should ensure `_mi_thread_done` is called on thread termination.
+void _mi_prim_thread_init_auto_done(void);
+
+// Called on process exit and may take action to clean up resources associated with the thread auto done.
+void _mi_prim_thread_done_auto_done(void);
+
+// Called when the default theap for a thread changes
+void _mi_prim_thread_associate_default_theap(mi_theap_t* theap);
+
+// Is this thread part of a thread pool?
+bool _mi_prim_thread_is_in_threadpool(void);
+
+
+//-------------------------------------------------------------------
+// Access to TLS (thread local storage) slots.
+// We need fast access to both a unique thread id (in `free.c:mi_free`) and
+// to a thread-local theap pointer (in `alloc.c:mi_malloc`).
+// To achieve this we use specialized code for various platforms.
+//-------------------------------------------------------------------
+
+// On some libc + platform combinations we can directly access a thread-local storage (TLS) slot.
+// The TLS layout depends on both the OS and libc implementation so we use specific tests for each main platform.
+// If you test on another platform and it works please send a PR :-)
+// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register.
+//
+// Note: we would like to prefer `__builtin_thread_pointer()` nowadays instead of using assembly,
+// but unfortunately we can not detect support reliably (see issue #883)
+// We also use it on Apple OS as we use a TLS slot for the default theap there.
+#if (defined(_WIN32)) || \
+    (defined(__GNUC__) && ( \
+           (defined(__GLIBC__)   && (defined(__x86_64__) || defined(__i386__) || (defined(__arm__) && __ARM_ARCH >= 7) || defined(__aarch64__))) \
+        || (defined(__APPLE__)   && (defined(__x86_64__) || defined(__aarch64__) || defined(__POWERPC__))) \
+        || (defined(__BIONIC__)  && (defined(__x86_64__) || defined(__i386__) || (defined(__arm__) && __ARM_ARCH >= 7) || defined(__aarch64__))) \
+        || (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \
+        || (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \
+      ))
+
+static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept {
+  void* res;
+  const size_t ofs = (slot*sizeof(void*));
+  #if defined(_WIN32)
+    #if (_M_X64 || _M_AMD64) && !defined(_M_ARM64EC)
+      res = (void*)__readgsqword((unsigned long)ofs);   // direct load at offset from gs
+    #elif _M_IX86 && !defined(_M_ARM64EC)
+      res = (void*)__readfsdword((unsigned long)ofs);   // direct load at offset from fs
+    #else
+      res = ((void**)NtCurrentTeb())[slot]; MI_UNUSED(ofs);
+    #endif
+  #elif defined(__i386__)
+    __asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // x86 32-bit always uses GS
+  #elif defined(__APPLE__) && defined(__x86_64__)
+    __asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // x86_64 macOSX uses GS
+  #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
+    __asm__("movl %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // x32 ABI
+  #elif defined(__x86_64__)
+    __asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // x86_64 Linux, BSD uses FS
+  #elif defined(__arm__)
+    void** tcb; MI_UNUSED(ofs);
+    __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
+    res = tcb[slot];
+  #elif defined(__aarch64__)
+    void** tcb; MI_UNUSED(ofs);
+    #if defined(__APPLE__) // M1, issue #343
+    __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb));
+    #else
+    __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
+    #endif
+    res = tcb[slot];
+  #elif defined(__APPLE__) && defined(__POWERPC__) // ppc, issue #781
+    MI_UNUSED(ofs);
+    res = pthread_getspecific(slot);
+  #else
+    #define MI_HAS_TLS_SLOT 0
+    MI_UNUSED(ofs);
+    res = NULL;
+  #endif
+  return res;
+}
+
+#ifndef MI_HAS_TLS_SLOT
+#define MI_HAS_TLS_SLOT 1
+#endif
+
+// setting a tls slot is only used on macOS for now
+static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexcept {
+  const size_t ofs = (slot*sizeof(void*));
+  #if defined(_WIN32)
+    ((void**)NtCurrentTeb())[slot] = value; MI_UNUSED(ofs);
+  #elif defined(__i386__)
+    __asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : );  // 32-bit always uses GS
+  #elif defined(__APPLE__) && defined(__x86_64__)
+    __asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : );  // x86_64 macOS uses GS
+  #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
+    __asm__("movl %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : );  // x32 ABI
+  #elif defined(__x86_64__)
+    __asm__("movq %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : );  // x86_64 Linux, BSD uses FS
+  #elif defined(__arm__)
+    void** tcb; MI_UNUSED(ofs);
+    __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
+    tcb[slot] = value;
+  #elif defined(__aarch64__)
+    void** tcb; MI_UNUSED(ofs);
+    #if defined(__APPLE__) // M1, issue #343
+    __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb));
+    #else
+    __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
+    #endif
+    tcb[slot] = value;
+  #elif defined(__APPLE__) && defined(__POWERPC__) // ppc, issue #781
+    MI_UNUSED(ofs);
+    pthread_setspecific(slot, value);
+  #else
+    MI_UNUSED(ofs); MI_UNUSED(value);
+  #endif
+}
+
+#endif
+
+
+// defined in `init.c`; do not use these directly
+extern mi_decl_hidden mi_decl_thread mi_theap_t* __mi_theap_main;     // theap belonging to the main heap
+extern mi_decl_hidden bool _mi_process_is_initialized;                // has mi_process_init been called?
+
+
+//-------------------------------------------------------------------
+// Get a fast unique thread id.
+//
+// Getting the thread id should be performant as it is called in the
+// fast path of `_mi_free` and we specialize for various platforms as
+// inlined definitions. Regular code should call `init.c:_mi_thread_id()`.
+// We only require _mi_prim_thread_id() to return a unique id
+// for each thread (unequal to zero).
+//-------------------------------------------------------------------
+
+
+// Do we have __builtin_thread_pointer? This would be the preferred way to get a unique thread id
+// but unfortunately, it seems we cannot test for this reliably at this time (see issue #883)
+// Nevertheless, it seems needed on older graviton platforms (see issue #851).
+// For now, we only enable this for specific platforms.
+#if !defined(__APPLE__)  /* on apple (M1) the wrong register is read (tpidr_el0 instead of tpidrro_el0) so fall back to TLS slot assembly (<https://github.com/microsoft/mimalloc/issues/343#issuecomment-763272369>)*/ \
+    && !defined(__CYGWIN__) \
+    && !defined(MI_LIBC_MUSL) \
+    && (!defined(__clang_major__) || __clang_major__ >= 14)  /* older clang versions emit bad code; fall back to using the TLS slot (<https://lore.kernel.org/linux-arm-kernel/202110280952.352F66D8@keescook/T/>) */
+  #if    (defined(__GNUC__) && (__GNUC__ >= 7)  && defined(__aarch64__)) /* aarch64 for older gcc versions (issue #851) */ \
+      || (defined(__GNUC__) && (__GNUC__ >= 11) && defined(__x86_64__)) \
+      || (defined(__clang_major__) && (__clang_major__ >= 14) && (defined(__aarch64__) || defined(__x86_64__)))
+    #define MI_USE_BUILTIN_THREAD_POINTER  1
+  #endif
+#endif
+
+static inline mi_threadid_t __mi_prim_thread_id(void) mi_attr_noexcept;
+
+static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
+  const mi_threadid_t tid = __mi_prim_thread_id();
+  mi_assert_internal(tid > 1);
+  mi_assert_internal((tid & MI_PAGE_FLAG_MASK) == 0);  // bottom 2 bits are clear?
+  return tid;
+}
+
+// Get a unique id for the current thread.
+#if defined(MI_PRIM_THREAD_ID)
+
+static inline mi_threadid_t __mi_prim_thread_id(void) mi_attr_noexcept {
+  return MI_PRIM_THREAD_ID();  // used for example by CPython for a free threaded build (see python/cpython#115488)
+}
+
+#elif defined(_WIN32)
+
+static inline mi_threadid_t __mi_prim_thread_id(void) mi_attr_noexcept {
+  // Windows: works on Intel and ARM in both 32- and 64-bit
+  return (uintptr_t)NtCurrentTeb();
+}
+
+#elif MI_USE_BUILTIN_THREAD_POINTER
+
+static inline mi_threadid_t __mi_prim_thread_id(void) mi_attr_noexcept {
+  // Works on most Unix based platforms with recent compilers
+  return (uintptr_t)__builtin_thread_pointer();
+}
+
+#elif MI_HAS_TLS_SLOT
+
+static inline mi_threadid_t __mi_prim_thread_id(void) mi_attr_noexcept {
+  #if defined(__BIONIC__)
+    // issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id
+    // see: https://github.com/aosp-mirror/platform_bionic/blob/c44b1d0676ded732df4b3b21c5f798eacae93228/libc/platform/bionic/tls_defines.h#L86
+    return (uintptr_t)mi_prim_tls_slot(1);
+  #else
+    // in all our other targets, slot 0 is the thread id
+    // glibc: https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/x86_64/nptl/tls.h
+    // apple: https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L36
+    return (uintptr_t)mi_prim_tls_slot(0);
+  #endif
+}
+
+#else
+
+// otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms).
+static inline mi_threadid_t __mi_prim_thread_id(void) mi_attr_noexcept {
+  return (uintptr_t)&__mi_theap_main;
+}
+
+#endif
+
+
+
+/* ----------------------------------------------------------------------------------------
+Get the thread local default theap: `_mi_theap_default()` (and the cached heap `_mi_theap_cached`).
+
+This is inlined here as it is on the fast path for allocation functions.
+We have 4 models:
+
+- MI_TLS_MODEL_THREAD_LOCAL: use regular thread local (Linux)
+    On most platforms (Linux, FreeBSD, NetBSD, etc), this just returns a
+    __thread local variable (`__mi_theap_default`). With the initial-exec TLS model this ensures
+    that the storage will always be available and properly initialized (with an empty theap).
+
+- MI_TLS_MODEL_FIXED_SLOT: use a fixed slot in the TLS (macOS)
+    On some platforms the underlying TLS implementation (or the loader) will call itself `malloc`
+    on a first access to a thread local and recurse in the MI_TLS_MODEL_THREAD_LOCAL.
+    We can get around this by reserving an unused and fixed TLS slot.
+
+- MI_TLS_MODEL_DYNAMIC_WIN32: use a dynamically allocated slot with TlsAlloc. (Windows)
+    Windows unfortunately has slow thread locals and this is more efficient.
+
+- MI_TLS_MODEL_DYNAMIC_PTHREADS: use pthread_getspecific
+    Last resort if thread-locals recurse.
+    Try to use MI_TLS_MODEL_THREAD_LOCAL with MI_TLS_RECURSE_GUARD defined instead.
+------------------------------------------------------------------------------------------- */
+
+static inline mi_theap_t* _mi_theap_default(void);
+static inline mi_theap_t* _mi_theap_cached(void);
+
+#if defined(_WIN32)
+  #define MI_TLS_MODEL_DYNAMIC_WIN32   1
+#elif defined(__APPLE__)  // macOS
+  // #define MI_TLS_MODEL_DYNAMIC_PTHREADS 1     // also works but a bit slower
+  #define MI_TLS_MODEL_FIXED_SLOT           1
+  #define MI_TLS_MODEL_FIXED_SLOT_DEFAULT   108  // seems unused. @apple: it would be great to get 2 official slots for custom allocators.. :-)
+  #define MI_TLS_MODEL_FIXED_SLOT_CACHED    109
+  // we used before __PTK_FRAMEWORK_OLDGC_KEY9 (89) but that seems used now.
+  // see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h>
+#elif defined(__OpenBSD__) // || defined(__ANDROID__)
+  #define MI_TLS_MODEL_DYNAMIC_PTHREADS     1
+  // #define MI_TLS_MODEL_DYNAMIC_PTHREADS_DEFAULT_ENTRY_IS_NULL  1
+#else
+  #define MI_TLS_MODEL_THREAD_LOCAL         1
+#endif
+
+// Declared this way to optimize register spills and branches
+mi_decl_cold mi_decl_noinline mi_theap_t* _mi_theap_empty_get(void);
+
+static inline mi_theap_t* __mi_theap_empty(void) {
+  #if __GNUC__
+  __asm("");  // prevent conditional load
+  return (mi_theap_t*)&_mi_theap_empty;
+  #else
+  return _mi_theap_empty_get();
+  #endif
+}
+
+#if MI_TLS_MODEL_THREAD_LOCAL
+// Thread local with an initial value (Linux). Very efficient.
+
+extern mi_decl_hidden mi_decl_thread mi_theap_t* __mi_theap_default;  // default theap to allocate from
+extern mi_decl_hidden mi_decl_thread mi_theap_t* __mi_theap_cached;   // theap from the last used heap
+
+static inline mi_theap_t* _mi_theap_default(void) {
+  #if defined(MI_TLS_RECURSE_GUARD)
+  if (mi_unlikely(!_mi_process_is_initialized)) return _mi_theap_empty_get();
+  #endif
+  return __mi_theap_default;
+}
+
+static inline mi_theap_t* _mi_theap_cached(void) {
+  return __mi_theap_cached;
+}
+
+#elif MI_TLS_MODEL_FIXED_SLOT
+// Fixed TLS slot (macOS).
+#define MI_THEAP_INITASNULL  1
+
+static inline mi_theap_t* _mi_theap_default(void) {
+  return (mi_theap_t*)mi_prim_tls_slot(MI_TLS_MODEL_FIXED_SLOT_DEFAULT);
+}
+
+static inline mi_theap_t* _mi_theap_cached(void) {
+  return (mi_theap_t*)mi_prim_tls_slot(MI_TLS_MODEL_FIXED_SLOT_CACHED);
+}
+
+#elif MI_TLS_MODEL_DYNAMIC_WIN32
+// Dynamic TLS slot (windows)
+#define MI_THEAP_INITASNULL  1
+
+extern mi_decl_hidden size_t _mi_theap_default_slot;
+extern mi_decl_hidden size_t _mi_theap_cached_slot;
+
+static inline mi_theap_t* _mi_theap_default(void) {
+  return (mi_theap_t*)mi_prim_tls_slot(_mi_theap_default_slot); // valid initial "last user slot" so it returns NULL at first leading to slot initialization
+}
+
+static inline mi_theap_t* _mi_theap_cached(void) {
+  return (mi_theap_t*)mi_prim_tls_slot(_mi_theap_cached_slot);
+}
+
+#elif MI_TLS_MODEL_DYNAMIC_PTHREADS
+// Dynamic pthread slot on less common platforms. This is not too bad (but not great either).
+#define MI_THEAP_INITASNULL  1
+
+extern mi_decl_hidden pthread_key_t _mi_theap_default_key;
+extern mi_decl_hidden pthread_key_t _mi_theap_cached_key;
+
+static inline mi_theap_t* _mi_theap_default(void) {
+  #if !MI_TLS_MODEL_DYNAMIC_PTHREADS_DEFAULT_ENTRY_IS_NULL
+  // we can skip this check if using the initial key will return NULL from pthread_getspecific
+  if mi_unlikely(_mi_theap_default_key==0) { return NULL; }
+  #endif
+  return (mi_theap_t*)pthread_getspecific(_mi_theap_default_key);
+}
+
+static inline mi_theap_t* _mi_theap_cached(void) {
+  #if !MI_TLS_MODEL_DYNAMIC_PTHREADS_DEFAULT_ENTRY_IS_NULL
+  // we can skip this check if using the initial key will return NULL from pthread_getspecific
+  if mi_unlikely(_mi_theap_cached_key==0) { return NULL; }
+  #endif
+  return (mi_theap_t*)pthread_getspecific(_mi_theap_cached_key);
+}
+
+#endif
+
+
+static inline mi_theap_t* _mi_theap_main(void) {
+  mi_theap_t* const theap = __mi_theap_main;
+  mi_assert_internal(mi_theap_is_initialized(theap));
+  return theap;
+}
+
+
+// Get (and possible create) the theap belonging to a heap
+// We cache the last accessed theap in `_mi_theap_cached` for better performance.
+static inline mi_theap_t* _mi_heap_theap(const mi_heap_t* heap) {
+  mi_theap_t* theap = _mi_theap_cached();
+  #if MI_THEAP_INITASNULL
+  if mi_likely(theap!=NULL && theap->heap==heap) return theap;
+  #else
+  if mi_likely(theap->heap==heap) return theap;
+  #endif
+  return _mi_heap_theap_get_or_init(heap);
+}
+
+static inline mi_theap_t* _mi_heap_theap_peek(const mi_heap_t* heap) {
+  mi_theap_t* theap = _mi_theap_cached();
+  #if MI_THEAP_INITASNULL
+  if mi_unlikely(theap==NULL || theap->heap!=heap)
+  #else
+  if mi_unlikely(theap->heap!=heap)
+  #endif
+  {
+    theap = _mi_heap_theap_get_peek(heap);  // don't update the cache on a query (?)
+  }
+  mi_assert(theap==NULL || theap->heap==heap);
+  return theap;
+}
+
+static inline mi_theap_t* _mi_page_associated_theap(mi_page_t* page) {
+  mi_heap_t* const heap = page->heap;
+  mi_theap_t* theap;
+  if mi_likely(heap==NULL) { theap = _mi_theap_main(); }
+                      else { theap = _mi_heap_theap(heap); }
+  mi_assert_internal(theap!=NULL && _mi_thread_id()==theap->tld->thread_id);
+  return theap;
+}
+
+// Find the associated theap or NULL if it does not exist (during shutdown)
+// Should be fast as it is called in `free.c:mi_free_try_collect`.
+static inline mi_theap_t* _mi_page_associated_theap_peek(mi_page_t* page) {
+  mi_heap_t* const heap = page->heap;
+  mi_theap_t* theap;
+  if mi_likely(heap==NULL) { theap = _mi_theap_main(); }
+                      else { theap = _mi_heap_theap_peek(heap); }
+  mi_assert_internal(theap==NULL || _mi_thread_id()==theap->tld->thread_id);
+  return theap;
+}
+
+#endif  // MI_PRIM_H
diff --git a/3rd/mimalloc-3.2.6/include/mimalloc/track.h b/3rd/mimalloc-3.2.6/include/mimalloc/track.h
new file mode 100644
index 00000000..8f8b93f9
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/include/mimalloc/track.h
@@ -0,0 +1,145 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+#pragma once
+#ifndef MI_TRACK_H
+#define MI_TRACK_H
+
+/* ------------------------------------------------------------------------------------------------------
+Track memory ranges with macros for tools like Valgrind address sanitizer, or other memory checkers.
+These can be defined for tracking allocation:
+
+  #define mi_track_malloc_size(p,reqsize,size,zero)
+  #define mi_track_free_size(p,_size)
+
+The macros are set up such that the size passed to `mi_track_free_size`
+always matches the size of `mi_track_malloc_size`. (currently, `size == mi_usable_size(p)`).
+The `reqsize` is what the user requested, and `size >= reqsize`.
+The `size` is either byte precise (and `size==reqsize`) if `MI_PADDING` is enabled,
+or otherwise it is the usable block size which may be larger than the original request.
+Use `_mi_block_size_of(void* p)` to get the full block size that was allocated (including padding etc).
+The `zero` parameter is `true` if the allocated block is zero initialized.
+
+Optional:
+
+  #define mi_track_align(p,alignedp,offset,size)
+  #define mi_track_resize(p,oldsize,newsize)
+  #define mi_track_init()
+
+The `mi_track_align` is called right after a `mi_track_malloc` for aligned pointers in a block.
+The corresponding `mi_track_free` still uses the block start pointer and original size (corresponding to the `mi_track_malloc`).
+The `mi_track_resize` is currently unused but could be called on reallocations within a block.
+`mi_track_init` is called at program start.
+
+The following macros are for tools like asan and valgrind to track whether memory is
+defined, undefined, or not accessible at all:
+
+  #define mi_track_mem_defined(p,size)
+  #define mi_track_mem_undefined(p,size)
+  #define mi_track_mem_noaccess(p,size)
+
+-------------------------------------------------------------------------------------------------------*/
+
+#if MI_TRACK_VALGRIND
+// valgrind tool
+
+#define MI_TRACK_ENABLED      1
+#define MI_TRACK_HEAP_DESTROY 1           // track free of individual blocks on theap_destroy
+#define MI_TRACK_TOOL         "valgrind"
+
+#include <valgrind/valgrind.h>
+#include <valgrind/memcheck.h>
+
+#define mi_track_malloc_size(p,reqsize,size,zero) VALGRIND_MALLOCLIKE_BLOCK(p,size,MI_PADDING_SIZE /*red zone*/,zero)
+#define mi_track_free_size(p,_size)               VALGRIND_FREELIKE_BLOCK(p,MI_PADDING_SIZE /*red zone*/)
+#define mi_track_resize(p,oldsize,newsize)        VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /*red zone*/)
+#define mi_track_mem_defined(p,size)              VALGRIND_MAKE_MEM_DEFINED(p,size)
+#define mi_track_mem_undefined(p,size)            VALGRIND_MAKE_MEM_UNDEFINED(p,size)
+#define mi_track_mem_noaccess(p,size)             VALGRIND_MAKE_MEM_NOACCESS(p,size)
+
+#elif MI_TRACK_ASAN
+// address sanitizer
+
+#define MI_TRACK_ENABLED      1
+#define MI_TRACK_HEAP_DESTROY 0
+#define MI_TRACK_TOOL         "asan"
+
+#include <sanitizer/asan_interface.h>
+
+#define mi_track_malloc_size(p,reqsize,size,zero) ASAN_UNPOISON_MEMORY_REGION(p,size)
+#define mi_track_free_size(p,size)                ASAN_POISON_MEMORY_REGION(p,size)
+#define mi_track_mem_defined(p,size)              ASAN_UNPOISON_MEMORY_REGION(p,size)
+#define mi_track_mem_undefined(p,size)            ASAN_UNPOISON_MEMORY_REGION(p,size)
+#define mi_track_mem_noaccess(p,size)             ASAN_POISON_MEMORY_REGION(p,size)
+
+#elif MI_TRACK_ETW
+// windows event tracing
+
+#define MI_TRACK_ENABLED      1
+#define MI_TRACK_HEAP_DESTROY 1
+#define MI_TRACK_TOOL         "ETW"
+
+#include "../src/prim/windows/etw.h"
+
+#define mi_track_init()                           EventRegistermicrosoft_windows_mimalloc();
+#define mi_track_malloc_size(p,reqsize,size,zero) EventWriteETW_MI_ALLOC((UINT64)(p), size)
+#define mi_track_free_size(p,size)                EventWriteETW_MI_FREE((UINT64)(p), size)
+
+#else
+// no tracking
+
+#define MI_TRACK_ENABLED      0
+#define MI_TRACK_HEAP_DESTROY 0
+#define MI_TRACK_TOOL         "none"
+
+#define mi_track_malloc_size(p,reqsize,size,zero)
+#define mi_track_free_size(p,_size)
+
+#endif
+
+// -------------------
+// Utility definitions
+
+#ifndef mi_track_resize
+#define mi_track_resize(p,oldsize,newsize)      mi_track_free_size(p,oldsize); mi_track_malloc(p,newsize,false)
+#endif
+
+#ifndef mi_track_align
+#define mi_track_align(p,alignedp,offset,size)  mi_track_mem_noaccess(p,offset)
+#endif
+
+#ifndef mi_track_init
+#define mi_track_init()
+#endif
+
+#ifndef mi_track_mem_defined
+#define mi_track_mem_defined(p,size)
+#endif
+
+#ifndef mi_track_mem_undefined
+#define mi_track_mem_undefined(p,size)
+#endif
+
+#ifndef mi_track_mem_noaccess
+#define mi_track_mem_noaccess(p,size)
+#endif
+
+
+#if MI_PADDING
+#define mi_track_malloc(p,reqsize,zero) \
+  if ((p)!=NULL) { \
+    mi_assert_internal(mi_usable_size(p)==(reqsize)); \
+    mi_track_malloc_size(p,reqsize,reqsize,zero); \
+  }
+#else
+#define mi_track_malloc(p,reqsize,zero) \
+  if ((p)!=NULL) { \
+    mi_assert_internal(mi_usable_size(p)>=(reqsize)); \
+    mi_track_malloc_size(p,reqsize,mi_usable_size(p),zero); \
+  }
+#endif
+
+#endif // MI_TRACK_H
diff --git a/3rd/mimalloc-3.2.6/include/mimalloc/types.h b/3rd/mimalloc-3.2.6/include/mimalloc/types.h
new file mode 100644
index 00000000..13216ad3
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/include/mimalloc/types.h
@@ -0,0 +1,705 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2018-2025, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+#pragma once
+#ifndef MI_TYPES_H
+#define MI_TYPES_H
+
+// --------------------------------------------------------------------------
+// This file contains the main type definitions for mimalloc:
+// mi_heap_t      : all data for a heap; usually there is just one main default heap.
+// mi_theap_t     : a thread local heap belonging to a specific heap:
+//                  maintains lists of thread-local heap pages that have free space.
+// mi_page_t      : a mimalloc page (usually 64KiB or 512KiB) from
+//                  where objects of a single size are allocated.
+//                  Note: we write "OS page" for OS memory pages while
+//                  using plain "page" for mimalloc pages (`mi_page_t`).
+// mi_arena_t     : a large memory area where pages are allocated (process shared)
+// mi_tld_t       : thread local data
+// mi_subproc_t   : all heaps belong to a sub-process (usually just the main one)
+// --------------------------------------------------------------------------
+
+
+#include <mimalloc-stats.h>
+#include <stddef.h>   // ptrdiff_t
+#include <stdint.h>   // uintptr_t, uint16_t, etc
+#include <stdbool.h>  // bool
+#include <limits.h>   // SIZE_MAX etc.
+#include <errno.h>    // error codes
+#include "bits.h"     // size defines (MI_INTPTR_SIZE etc), bit operations
+#include "atomic.h"   // _Atomic primitives
+
+// Minimal alignment necessary. On most platforms 16 bytes are needed
+// due to SSE registers for example. This must be at least `sizeof(void*)`
+#ifndef MI_MAX_ALIGN_SIZE
+#define MI_MAX_ALIGN_SIZE  16   // sizeof(max_align_t)
+#endif
+
+
+// ------------------------------------------------------
+// Variants
+// ------------------------------------------------------
+
+// Define NDEBUG in the release version to disable assertions.
+// #define NDEBUG
+
+// Define MI_TRACK_<tool> to enable tracking support
+// #define MI_TRACK_VALGRIND 1
+// #define MI_TRACK_ASAN     1
+// #define MI_TRACK_ETW      1
+
+// Define MI_STAT as 1 to maintain statistics; set it to 2 to have detailed statistics (but costs some performance).
+// #define MI_STAT 1
+
+// Define MI_SECURE to enable security mitigations. Level 1 has minimal performance impact,
+// but protects most metadata with guard pages:
+//   #define MI_SECURE 1  // guard page around metadata; check pointer validity on free
+//
+// Level 2 has more performance impact but protect well against various buffer overflows
+// by surrounding all mimalloc pages with guard pages:
+//   #define MI_SECURE 2  // guard page around each mimalloc page (can fragment VMA's with large theaps..)
+//
+// The next two levels can have more performance cost:
+//   #define MI_SECURE 3  // randomize allocations, encode free lists (detect corrupted free list (buffer overflow), and invalid pointer free)
+//   #define MI_SECURE 4  // checks for double free. (may be more expensive)
+
+#if !defined(MI_SECURE)
+#define MI_SECURE 0
+#endif
+
+// Define MI_DEBUG for debug mode
+// #define MI_DEBUG 1  // basic assertion checks and statistics, check double free, corrupted free list, and invalid pointer free.
+// #define MI_DEBUG 2  // + internal assertion checks
+// #define MI_DEBUG 3  // + extensive internal invariant checking (cmake -DMI_DEBUG_FULL=ON)
+#if !defined(MI_DEBUG)
+#if defined(MI_BUILD_RELEASE) || defined(NDEBUG)
+#define MI_DEBUG 0
+#else
+#define MI_DEBUG 2
+#endif
+#endif
+
+// Statistics (0=only essential, 1=normal, 2=more fine-grained (expensive) tracking)
+#ifndef MI_STAT
+#if (MI_DEBUG>0)
+#define MI_STAT 2
+#else
+#define MI_STAT 0
+#endif
+#endif
+
+// Use guard pages behind objects of a certain size (set by the MIMALLOC_DEBUG_GUARDED_MIN/MAX options)
+// Padding should be disabled when using guard pages
+// #define MI_GUARDED 1
+#if MI_GUARDED
+#define MI_PADDING  0
+#endif
+
+// Reserve extra padding at the end of each block to be more resilient against theap block overflows.
+// The padding can detect buffer overflow on free.
+#if !defined(MI_PADDING) && (MI_SECURE>=3 || MI_DEBUG>=1 || (MI_TRACK_VALGRIND || MI_TRACK_ASAN || MI_TRACK_ETW))
+#define MI_PADDING  1
+#endif
+
+// Check padding bytes; allows byte-precise buffer overflow detection
+#if !defined(MI_PADDING_CHECK) && MI_PADDING && (MI_SECURE>=3 || MI_DEBUG>=1)
+#define MI_PADDING_CHECK 1
+#endif
+
+
+// Encoded free lists allow detection of corrupted free lists
+// and can detect buffer overflows, modify after free, and double `free`s.
+#if (MI_SECURE>=3 || MI_DEBUG>=1)
+#define MI_ENCODE_FREELIST  1
+#endif
+
+// Enable large pages for objects between 64KiB and 512KiB.
+// This should perhaps be disabled by default as for many workloads the block sizes above 64 KiB
+// are quite random which can lead to too many partially used large pages (but see issue #1104).
+#ifndef MI_ENABLE_LARGE_PAGES
+#define MI_ENABLE_LARGE_PAGES  1
+#endif
+
+// --------------------------------------------------------------
+// Sizes of internal data-structures
+// (comments specify sizes on 64-bit, usually 32-bit is halved)
+// --------------------------------------------------------------
+
+// Main size parameter; determines max arena sizes and max arena object sizes etc.
+#ifndef MI_ARENA_SLICE_SHIFT
+  #ifdef  MI_SMALL_PAGE_SHIFT   // backward compatibility
+  #define MI_ARENA_SLICE_SHIFT              MI_SMALL_PAGE_SHIFT
+  #elif MI_SECURE && __APPLE__ && MI_ARCH_ARM64
+  #define MI_ARENA_SLICE_SHIFT              (17)                        // 128 KiB to not waste too much due to 16 KiB guard pages
+  #else
+  #define MI_ARENA_SLICE_SHIFT              (13 + MI_SIZE_SHIFT)        // 64 KiB (32 KiB on 32-bit)
+  #endif
+#endif
+#if MI_ARENA_SLICE_SHIFT < 12
+#error Arena slices should be at least 4KiB
+#endif
+
+#ifndef MI_BCHUNK_BITS_SHIFT
+  #if MI_ARENA_SLICE_SHIFT <= 13    // <= 8KiB
+  #define MI_BCHUNK_BITS_SHIFT              (7)   // 128 bits
+  #elif MI_ARENA_SLICE_SHIFT < 16   // <= 32KiB
+  #define MI_BCHUNK_BITS_SHIFT              (8)   // 256 bits
+  #else
+  #define MI_BCHUNK_BITS_SHIFT              (6 + MI_SIZE_SHIFT)       // 512 bits (or 256 on 32-bit)
+  #endif
+#endif
+
+#define MI_BCHUNK_BITS                    (1 << MI_BCHUNK_BITS_SHIFT)         // sub-bitmaps in arena's are "bchunks" of 512 bits
+#define MI_ARENA_SLICE_SIZE               (MI_ZU(1) << MI_ARENA_SLICE_SHIFT)  // arena's allocate in slices of 64 KiB
+#define MI_ARENA_SLICE_ALIGN              (MI_ARENA_SLICE_SIZE)
+
+#define MI_ARENA_MIN_OBJ_SLICES           (1)
+#define MI_ARENA_MAX_CHUNK_OBJ_SLICES     (MI_BCHUNK_BITS)                    // 32 MiB (or 8 MiB on 32-bit)
+
+#define MI_ARENA_MIN_OBJ_SIZE             (MI_ARENA_MIN_OBJ_SLICES * MI_ARENA_SLICE_SIZE)
+#define MI_ARENA_MAX_CHUNK_OBJ_SIZE       (MI_ARENA_MAX_CHUNK_OBJ_SLICES * MI_ARENA_SLICE_SIZE)
+#define MI_ARENA_MAX_OBJ_SIZE             (MI_SIZE_BITS * MI_ARENA_MAX_CHUNK_OBJ_SIZE)  // 2 GiB (or 256 MiB on 32-bit), larger than this is alloc'd by the OS
+
+#if MI_ARENA_MAX_CHUNK_OBJ_SIZE < MI_SIZE_SIZE*1024
+#error maximum object size may be too small to hold local thread data
+#endif
+
+#define MI_SMALL_PAGE_SIZE                MI_ARENA_MIN_OBJ_SIZE                    // 64 KiB
+#define MI_MEDIUM_PAGE_SIZE               (8*MI_SMALL_PAGE_SIZE)                   // 512 KiB  (=byte in the bchunk bitmap)
+#define MI_LARGE_PAGE_SIZE                (MI_SIZE_SIZE*MI_MEDIUM_PAGE_SIZE)       // 4 MiB    (=word in the bchunk bitmap)
+
+
+// Maximum number of size classes. (spaced exponentially in 12.5% increments)
+#if MI_BIN_HUGE != 73U
+#error "mimalloc internal: expecting 73 bins"
+#endif
+#define MI_BIN_FULL  (MI_BIN_HUGE+1)
+#define MI_BIN_COUNT (MI_BIN_FULL+1)
+
+// We never allocate more than PTRDIFF_MAX (see also <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
+#define MI_MAX_ALLOC_SIZE        PTRDIFF_MAX
+
+// Minimal commit for a page on-demand commit (should be >= OS page size)
+#define MI_PAGE_MIN_COMMIT_SIZE  MI_ARENA_SLICE_SIZE
+
+
+// ------------------------------------------------------
+// Arena's are large reserved areas of memory allocated from
+// the OS that are managed by mimalloc to efficiently
+// allocate MI_ARENA_SLICE_SIZE slices of memory for the
+// mimalloc pages.
+// ------------------------------------------------------
+
+// A large memory arena where pages are allocated in.
+typedef struct mi_arena_s mi_arena_t;     // defined below
+
+
+// ------------------------------------------------------
+// Heaps contain allocated blocks. Heaps are self-contained
+// but share the (sub-process) memory in the arena's.
+// ------------------------------------------------------
+
+// A first-class heap.
+typedef struct mi_heap_s mi_heap_t;       // heaps
+
+// ------------------------------------------------------
+// We can have sub-processes that are fully separated
+// from each other (for running multiple Python interpreters
+// for example). A sub-process holds the memory arenas and heaps.
+// ------------------------------------------------------
+
+// A sub-process
+typedef struct mi_subproc_s mi_subproc_t;
+
+
+// ---------------------------------------------------------------
+// a memory id tracks the provenance of arena/OS allocated memory
+// ---------------------------------------------------------------
+
+// Memory can reside in arena's, direct OS allocated, meta-data pages, or statically allocated.
+// The memid keeps track of this.
+typedef enum mi_memkind_e {
+  MI_MEM_NONE,      // not allocated
+  MI_MEM_EXTERNAL,  // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example)
+  MI_MEM_STATIC,    // allocated in a static area and should not be freed (the initial main theap data for example (`init.c`))
+  MI_MEM_META,      // allocated with the meta data allocator (`arena-meta.c`)
+  MI_MEM_OS,        // allocated from the OS
+  MI_MEM_OS_HUGE,   // allocated as huge OS pages (usually 1GiB, pinned to physical memory)
+  MI_MEM_OS_REMAP,  // allocated in a remapable area (i.e. using `mremap`)
+  MI_MEM_ARENA,     // allocated from an arena (the usual case) (`arena.c`)
+  MI_MEM_HEAP_MAIN  // allocated in the main heap (for theaps)
+} mi_memkind_t;
+
+static inline bool mi_memkind_is_os(mi_memkind_t memkind) {
+  return (memkind >= MI_MEM_OS && memkind <= MI_MEM_OS_REMAP);
+}
+
+static inline bool mi_memkind_needs_no_free(mi_memkind_t memkind) {
+  return (memkind <= MI_MEM_STATIC);
+}
+
+
+typedef struct mi_memid_os_info {
+  void*         base;               // actual base address of the block (used for offset aligned allocations)
+  size_t        size;               // allocated full size
+  // size_t        alignment;       // alignment at allocation
+} mi_memid_os_info_t;
+
+typedef struct mi_memid_arena_info {
+  mi_arena_t*   arena;              // arena that contains this memory
+  uint32_t      slice_index;        // slice index in the arena
+  uint32_t      slice_count;        // allocated slices
+} mi_memid_arena_info_t;
+
+typedef struct mi_memid_meta_info {
+  void*         meta_page;          // meta-page that contains the block
+  uint32_t      block_index;        // block index in the meta-data page
+  uint32_t      block_count;        // allocated blocks
+} mi_memid_meta_info_t;
+
+typedef struct mi_memid_s {
+  union {
+    mi_memid_os_info_t    os;       // only used for MI_MEM_OS
+    mi_memid_arena_info_t arena;    // only used for MI_MEM_ARENA
+    mi_memid_meta_info_t  meta;     // only used for MI_MEM_META
+  } mem;
+  mi_memkind_t  memkind;
+  bool          is_pinned;          // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large (2Mib) or huge (1GiB) OS pages)
+  bool          initially_committed;// `true` if the memory was originally allocated as committed
+  bool          initially_zero;     // `true` if the memory was originally zero initialized
+} mi_memid_t;
+
+
+static inline bool mi_memid_is_os(mi_memid_t memid) {
+  return mi_memkind_is_os(memid.memkind);
+}
+
+static inline bool mi_memid_needs_no_free(mi_memid_t memid) {
+  return mi_memkind_needs_no_free(memid.memkind);
+}
+
+static inline mi_arena_t* mi_memid_arena(mi_memid_t memid) {
+  return (memid.memkind == MI_MEM_ARENA ? memid.mem.arena.arena : NULL);
+}
+
+
+// ------------------------------------------------------
+// Mimalloc pages contain allocated blocks
+// ------------------------------------------------------
+
+// The free lists use encoded next fields
+// (Only actually encodes when MI_ENCODED_FREELIST is defined.)
+typedef uintptr_t  mi_encoded_t;
+
+// thread id's
+typedef size_t     mi_threadid_t;
+
+// free lists contain blocks
+typedef struct mi_block_s {
+  mi_encoded_t next;
+} mi_block_t;
+
+
+// The page flags are put in the bottom 2 bits of the thread_id (for a fast test in `mi_free`)
+// If `has_interior_pointers` is true if the page has pointers at an offset in a block (so we have to unalign to the block start before free-ing)
+// `in_full_queue` is true if the page is full and resides in the full queue (so we move it to a regular queue on free-ing)
+#define MI_PAGE_IN_FULL_QUEUE           MI_ZU(0x01)
+#define MI_PAGE_HAS_INTERIOR_POINTERS   MI_ZU(0x02)
+#define MI_PAGE_FLAG_MASK               MI_ZU(0x03)
+typedef size_t mi_page_flags_t;
+
+// There are two special threadid's: 0 for pages that are abandoned (and not in a theap queue),
+// and 4 for abandoned & mapped threads -- abandoned-mapped pages are abandoned but also mapped
+// in an arena (in `mi_heap_t.arena_pages.pages_abandoned`) so these can be quickly found for reuse.
+// Abondoning partially used pages allows for sharing of this memory between threads (in particular if threads are blocked)
+#define MI_THREADID_ABANDONED           MI_ZU(0)
+#define MI_THREADID_ABANDONED_MAPPED    (MI_PAGE_FLAG_MASK + 1)
+
+// Thread free list.
+// Points to a list of blocks that are freed by other threads.
+// The least-bit is set if the page is owned by the current thread. (`mi_page_is_owned`).
+// Ownership is required before we can read any non-atomic fields in the page.
+// This way we can push a block on the thread free list and try to claim ownership atomically in `free.c:mi_free_block_mt`.
+typedef uintptr_t mi_thread_free_t;
+
+// A page contains blocks of one specific size (`block_size`).
+// Each page has three list of free blocks:
+// `free` for blocks that can be allocated,
+// `local_free` for freed blocks that are not yet available to `mi_malloc`
+// `thread_free` for freed blocks by other threads
+// The `local_free` and `thread_free` lists are migrated to the `free` list
+// when it is exhausted. The separate `local_free` list is necessary to
+// implement a monotonic heartbeat. The `thread_free` list is needed for
+// avoiding atomic operations when allocating from the owning thread.
+//
+// `used - |thread_free|` == actual blocks that are in use (alive)
+// `used - |thread_free| + |free| + |local_free| == capacity`
+//
+// We don't count "freed" (as |free|) but use only the `used` field to reduce
+// the number of memory accesses in the `mi_page_all_free` function(s).
+// Use `_mi_page_free_collect` to collect the thread_free list and update the `used` count.
+//
+// Notes:
+// - Non-atomic fields can only be accessed if having _ownership_ (low bit of `xthread_free` is 1).
+//   Combining the `thread_free` list with an ownership bit allows a concurrent `free` to atomically
+//   free an object and (re)claim ownership if the page was abandoned.
+// - If a page is not part of a theap it is called "abandoned"  (`theap==NULL`) -- in
+//   that case the `xthreadid` is 0 or 4 (4 is for abandoned pages that
+//   are in the `pages_abandoned` lists of an arena, these are called "mapped" abandoned pages).
+// - page flags are in the bottom 3 bits of `xthread_id` for the fast path in `mi_free`.
+// - The layout is optimized for `free.c:mi_free` and `alloc.c:mi_page_alloc`
+// - Using `uint16_t` does not seem to slow things down
+
+typedef struct mi_page_s {
+  _Atomic(mi_threadid_t)    xthread_id;        // thread this page belongs to. (= `theap->thread_id (or 0 or 4 if abandoned) | page_flags`)
+
+  mi_block_t*               free;              // list of available free blocks (`malloc` allocates from this list)
+  uint16_t                  used;              // number of blocks in use (including blocks in `thread_free`)
+  uint16_t                  capacity;          // number of blocks committed
+  uint16_t                  reserved;          // number of blocks reserved in memory
+  uint8_t                   retire_expire;     // expiration count for retired blocks
+  bool                      free_is_zero;      // `true` if the blocks in the free list are zero initialized
+
+  mi_block_t*               local_free;        // list of deferred free blocks by this thread (migrates to `free`)
+  _Atomic(mi_thread_free_t) xthread_free;      // list of deferred free blocks freed by other threads (= `mi_block_t* | (1 if owned)`)
+
+  size_t                    block_size;        // const: size available in each block (always `>0`)
+  uint8_t*                  page_start;        // const: start of the blocks
+
+  #if (MI_ENCODE_FREELIST || MI_PADDING)
+  uintptr_t                 keys[2];           // const: two random keys to encode the free lists (see `_mi_block_next`) or padding canary
+  #endif
+
+  mi_theap_t*               theap;             // the theap owning this page (may not be valid or NULL for abandoned pages)
+  mi_heap_t*                heap;              // const: the heap owning this page
+
+  struct mi_page_s*         next;              // next page owned by the theap with the same `block_size`
+  struct mi_page_s*         prev;              // previous page owned by the theap with the same `block_size`
+  size_t                    slice_committed;   // committed size relative to the first arena slice of the page data (or 0 if the page is fully committed already)
+  mi_memid_t                memid;             // const: provenance of the page memory
+} mi_page_t;
+
+
+// ------------------------------------------------------
+// Object sizes
+// ------------------------------------------------------
+
+#define MI_PAGE_ALIGN                     MI_ARENA_SLICE_ALIGN      // pages must be aligned on this for the page map.
+#define MI_PAGE_MIN_START_BLOCK_ALIGN     MI_MAX_ALIGN_SIZE         // minimal block alignment for the first block in a page (16b)
+#define MI_PAGE_MAX_START_BLOCK_ALIGN2    (4*MI_KiB)                // maximal block alignment for "power of 2"-sized blocks (such that we guarantee natural alignment)
+#define MI_PAGE_OSPAGE_BLOCK_ALIGN2       (4*MI_KiB)                // also aligns any multiple of this size to avoid TLB misses.
+#define MI_PAGE_MAX_OVERALLOC_ALIGN       MI_ARENA_SLICE_SIZE       // (64 KiB) limit for which we overallocate in arena pages, beyond this use OS allocation
+
+#if (MI_ENCODE_FREELIST || MI_PADDING) && MI_SIZE_SIZE == 8
+#define MI_PAGE_INFO_SIZE                 ((MI_INTPTR_SHIFT+2)*32)  // 160    >= sizeof(mi_page_t)
+#else
+#define MI_PAGE_INFO_SIZE                 ((MI_INTPTR_SHIFT+1)*32)  // 128/96 >= sizeof(mi_page_t)
+#endif
+
+// The max object sizes are intended to not waste more than ~ 12.5% internally over the page sizes.
+#define MI_SMALL_MAX_OBJ_SIZE             ((MI_SMALL_PAGE_SIZE-MI_PAGE_OSPAGE_BLOCK_ALIGN2)/6)   // = 10 KiB
+#if MI_ENABLE_LARGE_PAGES
+#define MI_MEDIUM_MAX_OBJ_SIZE            ((MI_MEDIUM_PAGE_SIZE-MI_PAGE_OSPAGE_BLOCK_ALIGN2)/6)  // ~ 84 KiB
+#define MI_LARGE_MAX_OBJ_SIZE             (MI_LARGE_PAGE_SIZE/8)    // <= 512 KiB // note: this must be a nice power of 2 or we get rounding issues with `_mi_bin`
+#else
+#define MI_MEDIUM_MAX_OBJ_SIZE            (MI_MEDIUM_PAGE_SIZE/8)   // <= 64 KiB
+#define MI_LARGE_MAX_OBJ_SIZE             MI_MEDIUM_MAX_OBJ_SIZE    // note: this must be a nice power of 2 or we get rounding issues with `_mi_bin`
+#endif
+#define MI_LARGE_MAX_OBJ_WSIZE            (MI_LARGE_MAX_OBJ_SIZE/MI_SIZE_SIZE)
+
+#if (MI_LARGE_MAX_OBJ_WSIZE >= 655360)
+#error "mimalloc internal: define more bins"
+#endif
+
+
+// ------------------------------------------------------
+// Page kinds
+// ------------------------------------------------------
+
+typedef enum mi_page_kind_e {
+  MI_PAGE_SMALL,      // small blocks go into 64KiB pages
+  MI_PAGE_MEDIUM,     // medium blocks go into 512KiB pages
+  MI_PAGE_LARGE,      // larger blocks go into 4MiB pages (if `MI_ENABLE_LARGE_PAGES==1`)
+  MI_PAGE_SINGLETON   // page containing a single block.
+                      // used for blocks `> MI_LARGE_MAX_OBJ_SIZE` or an aligment `> MI_PAGE_MAX_OVERALLOC_ALIGN`.
+} mi_page_kind_t;
+
+
+
+// ------------------------------------------------------
+// A "theap" is a thread local heap which owns pages.
+// (making them thread-local avoids atomic operations)
+//
+// All theaps belong to a (non-thread-local) heap.
+// A theap just owns a set of pages for allocation and
+// can only be allocate/reallocate from the thread that created it.
+// Freeing blocks can be done from any thread though.
+//
+// Per thread, there is always a default theap that belongs
+// to the default heap. It is initialized to statically
+// point initially to an empty theap to avoid initialization
+// checks in the fast path.
+// ------------------------------------------------------
+
+// Thread local data
+typedef struct mi_tld_s mi_tld_t;   // defined below
+
+// Pages of a certain block size are held in a queue.
+typedef struct mi_page_queue_s {
+  mi_page_t* first;
+  mi_page_t* last;
+  size_t     count;
+  size_t     block_size;
+} mi_page_queue_t;
+
+// Random context
+typedef struct mi_random_cxt_s {
+  uint32_t input[16];
+  uint32_t output[16];
+  int      output_available;
+  bool     weak;
+} mi_random_ctx_t;
+
+
+// In debug mode there is a padding structure at the end of the blocks to check for buffer overflows
+#if MI_PADDING
+typedef struct mi_padding_s {
+  uint32_t canary; // encoded block value to check validity of the padding (in case of overflow)
+  uint32_t delta;  // padding bytes before the block. (mi_usable_size(p) - delta == exact allocated bytes)
+} mi_padding_t;
+#define MI_PADDING_SIZE   (sizeof(mi_padding_t))
+#define MI_PADDING_WSIZE  ((MI_PADDING_SIZE + MI_INTPTR_SIZE - 1) / MI_INTPTR_SIZE)
+#else
+#define MI_PADDING_SIZE   0
+#define MI_PADDING_WSIZE  0
+#endif
+
+#define MI_PAGES_DIRECT   (MI_SMALL_WSIZE_MAX + MI_PADDING_WSIZE + 1)
+
+
+// A thread-local heap ("theap") owns a set of thread-local pages.
+struct mi_theap_s {
+  mi_tld_t*             tld;                                 // thread-local data
+  mi_heap_t*            heap;                                // the heap this theap belongs to.
+  unsigned long long    heartbeat;                           // monotonic heartbeat count
+  uintptr_t             cookie;                              // random cookie to verify pointers (see `_mi_ptr_cookie`)
+  mi_random_ctx_t       random;                              // random number context used for secure allocation
+  size_t                page_count;                          // total number of pages in the `pages` queues.
+  size_t                page_retired_min;                    // smallest retired index (retired pages are fully free, but still in the page queues)
+  size_t                page_retired_max;                    // largest retired index into the `pages` array.
+  long                  generic_count;                       // how often is `_mi_malloc_generic` called?
+  long                  generic_collect_count;               // how often is `_mi_malloc_generic` called without collecting?
+
+  mi_theap_t*           tnext;                               // list of theaps in this thread
+  mi_theap_t*           tprev;
+  mi_theap_t*           hnext;                               // list of theaps of the owning `heap`
+  mi_theap_t*           hprev;
+
+  long                  page_full_retain;                    // how many full pages can be retained per queue (before abandoning them)
+  bool                  allow_page_reclaim;                  // `true` if this theap should not reclaim abandoned pages
+  bool                  allow_page_abandon;                  // `true` if this theap can abandon pages to reduce memory footprint
+  #if MI_GUARDED
+  size_t                guarded_size_min;                    // minimal size for guarded objects
+  size_t                guarded_size_max;                    // maximal size for guarded objects
+  size_t                guarded_sample_rate;                 // sample rate (set to 0 to disable guarded pages)
+  size_t                guarded_sample_count;                // current sample count (counting down to 0)
+  #endif
+  mi_page_t*            pages_free_direct[MI_PAGES_DIRECT];  // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size.
+  mi_page_queue_t       pages[MI_BIN_COUNT];                 // queue of pages for each size class (or "bin")
+  mi_memid_t            memid;                               // provenance of the theap struct itself (meta or os)
+  mi_stats_t            stats;                               // thread-local statistics
+};
+
+
+
+
+// ------------------------------------------------------
+// Heaps contain allocated blocks. Heaps are self-contained
+// but share the (sub-process) memory in the arena's.
+// ------------------------------------------------------
+
+// Keep track of all owned and abandoned pages in the arena's
+struct mi_arena_pages_s;
+typedef struct mi_arena_pages_s mi_arena_pages_t;
+
+#define MI_MAX_ARENAS   (160)   // Limited for now (and takes up .bss).. but arena's scale up exponentially (see `mi_arena_reserve`)
+                                // 160 arenas is enough for ~2 TiB memory
+
+// A dynamic thread-local variable; 0 for an invalid thread-local
+typedef size_t mi_thread_local_t;
+
+typedef struct mi_heap_s {
+  mi_subproc_t*         subproc;                        // a heap belongs to a subprocess
+  size_t                heap_seq;                       // unique sequence number for heaps in this subprocess
+  mi_heap_t*            next;                           // list of heaps in this subprocess
+  mi_heap_t*            prev;
+  mi_thread_local_t     theap;                          // dynamic thread local for the thread-local theaps of this heap
+
+  mi_arena_t*           exclusive_arena;                // if the heap should only allocate from a specific arena (or NULL)
+  int                   numa_node;                      // if >=0, prefer this numa node for allocations
+
+  mi_theap_t*           theaps;                         // list of all thread-local theaps belonging to this heap (using the `hnext`/`hprev` fields)
+  mi_lock_t             theaps_lock;                    // lock for the theaps list operations
+
+  _Atomic(size_t)       abandoned_count[MI_BIN_COUNT];  // total count of abandoned pages in this heap
+  mi_page_t*            os_abandoned_pages;             // list of pages that are OS allocated and not in an arena
+  mi_lock_t             os_abandoned_pages_lock;        // lock for the os abandoned pages list (this lock protects list operations)
+
+  _Atomic(mi_arena_pages_t*) arena_pages[MI_MAX_ARENAS]; // track owned and abandoned pages in the arenas (entries can be NULL)
+  mi_lock_t             arena_pages_lock;                // lock to update the arena_pages array
+
+  mi_stats_t            stats;                           // statistics for this heap; periodically updated by merging from each theap
+} mi_heap_t;
+
+
+// ------------------------------------------------------
+// Sub processes do not reclaim or visit pages from other sub processes.
+// These are essentially the static variables of a process, and
+// usually there is only one subprocess. This can be used for example
+// by CPython to have separate interpreters within one process.
+// Each thread can only belong to one subprocess
+// (and needs to call `mi_subproc_add_current_thread` before any allocations).
+// ------------------------------------------------------
+
+struct mi_subproc_s {
+  size_t                subproc_seq;                    // unique id for sub-processes
+  mi_subproc_t*         next;                           // list of all sub-processes
+  mi_subproc_t*         prev;
+
+  _Atomic(size_t)       arena_count;                    // current count of arena's
+  _Atomic(mi_arena_t*)  arenas[MI_MAX_ARENAS];          // arena's of this sub-process
+  mi_lock_t             arena_reserve_lock;             // lock to ensure arena's get reserved one at a time
+  mi_decl_align(8)                                      // needed on some 32-bit platforms
+  _Atomic(int64_t)      purge_expire;                   // expiration is set if any arenas can be purged
+
+  _Atomic(mi_heap_t*)   heap_main;                      // main heap for this sub process
+  mi_heap_t*            heaps;                          // heaps belonging to this sub-process
+  mi_lock_t             heaps_lock;
+
+  _Atomic(size_t)       thread_count;                   // current threads associated with this sub-process
+  _Atomic(size_t)       thread_total_count;             // total created threads associated with this sub-process
+  _Atomic(size_t)       heap_count;                     // current heaps in this sub-process (== |heaps|)
+  _Atomic(size_t)       heap_total_count;               // total created heaps in this sub-process
+
+  mi_memid_t            memid;                          // provenance of this memory block (meta or static)
+  mi_decl_align(8)                                      // needed on some 32-bit platforms
+  mi_stats_t            stats;                          // subprocess statistics; updated for arena/OS stats like committed,
+                                                        // and otherwise merged with heap stats when those are deleted
+};
+
+
+// ------------------------------------------------------
+// Thread Local data
+// ------------------------------------------------------
+
+// Milliseconds as in `int64_t` to avoid overflows
+typedef int64_t  mi_msecs_t;
+
+// Thread local data
+struct mi_tld_s {
+  mi_threadid_t         thread_id;            // thread id of this thread
+  size_t                thread_seq;           // thread sequence id (linear count of created threads)
+  int                   numa_node;            // thread preferred numa node
+  mi_subproc_t*         subproc;              // sub-process this thread belongs to.
+  mi_theap_t*           theaps;               // list of theaps in this thread (so we can abandon all when the thread terminates)
+  bool                  recurse;              // true if deferred was called; used to prevent infinite recursion.
+  bool                  is_in_threadpool;     // true if this thread is part of a threadpool (and can run arbitrary tasks)
+  mi_memid_t            memid;                // provenance of the tld memory itself (meta or OS)
+};
+
+
+/* ----------------------------------------------------------------------------
+  Arenas are fixed area's of OS memory from which we can allocate
+  large blocks (>= MI_ARENA_MIN_BLOCK_SIZE).
+  In contrast to the rest of mimalloc, the arenas are shared between
+  threads and need to be accessed using atomic operations (using atomic `mi_bitmap_t`'s).
+
+  Arenas are also used to for huge OS page (1GiB) reservations or for reserving
+  OS memory upfront which can be improve performance or is sometimes needed
+  on embedded devices. We can also employ this with WASI or `sbrk` systems
+  to reserve large arenas upfront and be able to reuse the memory more effectively.
+-----------------------------------------------------------------------------*/
+
+#define MI_ARENA_BIN_COUNT      (MI_BIN_COUNT)
+#define MI_ARENA_MIN_SIZE       (MI_BCHUNK_BITS * MI_ARENA_SLICE_SIZE)           // 32 MiB (or 8 MiB on 32-bit)
+#define MI_ARENA_MAX_SIZE       (MI_BITMAP_MAX_BIT_COUNT * MI_ARENA_SLICE_SIZE)
+
+typedef struct mi_bitmap_s  mi_bitmap_t;    // atomic bitmap  (defined in `src/bitmap.h`)
+typedef struct mi_bbitmap_s mi_bbitmap_t;   // atomic binned bitmap (defined in `src/bitmap.h`)
+
+typedef struct mi_arena_pages_s {
+  mi_bitmap_t* pages;                // all registered pages (abandoned and owned)
+  mi_bitmap_t* pages_abandoned[MI_ARENA_BIN_COUNT];  // abandoned pages per size bin (a set bit means the start of the page)
+  // followed by the bitmaps (whose sizes depend on the arena size)
+} mi_arena_pages_t;
+
+
+// A memory arena
+typedef struct mi_arena_s {
+  mi_memid_t          memid;                // provenance of the memory area
+  mi_subproc_t*       subproc;              // subprocess this arena belongs to (`this 'element-of' this->subproc->arenas`)
+  size_t              arena_idx;            // index in the arenas array
+
+  size_t              slice_count;          // total size of the area in arena slices (of `MI_ARENA_SLICE_SIZE`)
+  size_t              info_slices;          // initial slices reserved for the arena bitmaps
+  int                 numa_node;            // associated NUMA node
+  bool                is_exclusive;         // only allow allocations if specifically for this arena
+  mi_decl_align(8)                          // needed on some 32-bit platforms
+  _Atomic(mi_msecs_t) purge_expire;         // expiration time when slices can be purged from `slices_purge`.
+  mi_commit_fun_t*    commit_fun;           // custom commit/decommit memory
+  void*               commit_fun_arg;       // user argument for a custom commit function
+
+  mi_bbitmap_t*       slices_free;          // is the slice free? (a binned bitmap with size classes)
+  mi_bitmap_t*        slices_committed;     // is the slice committed? (i.e. accessible)
+  mi_bitmap_t*        slices_dirty;         // is the slice potentially non-zero?
+  mi_bitmap_t*        slices_purge;         // slices that can be purged
+  mi_arena_pages_t    pages_main;           // arena page bitmaps for the main heap are allocated up front as well
+
+  // followed by the bitmaps (whose sizes depend on the arena size)
+  // note: when adding bitmaps revise `mi_arena_info_slices_needed`
+} mi_arena_t;
+
+
+
+/* -----------------------------------------------------------
+  Error codes passed to `_mi_fatal_error`
+  All are recoverable but EFAULT is a serious error and aborts by default in secure mode.
+  For portability define undefined error codes using common Unix codes:
+  <https://www-numi.fnal.gov/offline_software/srt_public_context/WebDocs/Errors/unix_system_errors.html>
+----------------------------------------------------------- */
+
+#ifndef EAGAIN         // double free
+#define EAGAIN (11)
+#endif
+#ifndef ENOMEM         // out of memory
+#define ENOMEM (12)
+#endif
+#ifndef EFAULT         // corrupted free-list or meta-data
+#define EFAULT (14)
+#endif
+#ifndef EINVAL         // trying to free an invalid pointer
+#define EINVAL (22)
+#endif
+#ifndef EOVERFLOW      // count*size overflow
+#define EOVERFLOW (75)
+#endif
+
+/* -----------------------------------------------------------
+  Debug constants
+----------------------------------------------------------- */
+
+#if !defined(MI_DEBUG_UNINIT)
+#define MI_DEBUG_UNINIT     (0xD0)
+#endif
+#if !defined(MI_DEBUG_FREED)
+#define MI_DEBUG_FREED      (0xDF)
+#endif
+#if !defined(MI_DEBUG_PADDING)
+#define MI_DEBUG_PADDING    (0xDE)
+#endif
+
+
+#endif // MI_TYPES_H
diff --git a/3rd/mimalloc-2.0.9/mimalloc.pc.in b/3rd/mimalloc-3.2.6/mimalloc.pc.in
similarity index 64%
rename from 3rd/mimalloc-2.0.9/mimalloc.pc.in
rename to 3rd/mimalloc-3.2.6/mimalloc.pc.in
index 36da2038..80922256 100644
--- a/3rd/mimalloc-2.0.9/mimalloc.pc.in
+++ b/3rd/mimalloc-3.2.6/mimalloc.pc.in
@@ -1,11 +1,11 @@
 prefix=@CMAKE_INSTALL_PREFIX@
-libdir=@libdir_for_pc_file@
-includedir=@includedir_for_pc_file@
+libdir=@mi_pc_libdir@
+includedir=@mi_pc_includedir@
 
 Name: @PROJECT_NAME@
 Description: A compact general purpose allocator with excellent performance
 Version: @PACKAGE_VERSION@
 URL: https://github.com/microsoft/mimalloc/
-Libs: -L${libdir} -lmimalloc
-Libs.private: @pc_libraries@
+Libs: -L${libdir} -l@mi_libname@
+Libs.private: @mi_pc_libraries@
 Cflags: -I${includedir}
diff --git a/3rd/mimalloc-2.0.9/readme.md b/3rd/mimalloc-3.2.6/readme.md
similarity index 65%
rename from 3rd/mimalloc-2.0.9/readme.md
rename to 3rd/mimalloc-3.2.6/readme.md
index 364b974b..4a03ca4a 100644
--- a/3rd/mimalloc-2.0.9/readme.md
+++ b/3rd/mimalloc-3.2.6/readme.md
@@ -1,7 +1,7 @@
 
 <img align="left" width="100" height="100" src="doc/mimalloc-logo.png"/>
 
-[<img align="right" src="https://dev.azure.com/Daan0324/mimalloc/_apis/build/status/microsoft.mimalloc?branchName=dev"/>](https://dev.azure.com/Daan0324/mimalloc/_build?definitionId=1&_a=summary)
+[<img align="right" src="https://dev.azure.com/Daan0324/mimalloc/_apis/build/status/microsoft.mimalloc?branchName=dev3"/>](https://dev.azure.com/Daan0324/mimalloc/_build?definitionId=1&_a=summary)
 
 # mimalloc
 
@@ -9,26 +9,30 @@
 
 mimalloc (pronounced "me-malloc")
 is a general purpose allocator with excellent [performance](#performance) characteristics.
-Initially developed by Daan Leijen for the run-time systems of the
+Initially developed by Daan Leijen for the runtime systems of the
 [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages.
 
-Latest release tag: `v2.0.9` (2022-12-23).
-Latest stable  tag: `v1.7.9` (2022-12-23).
+Latest release   : `v3.2.6` (2026-01-08) release candidate 1, please report any issues.  
+Latest v2 release: `v2.2.6` (2026-01-08).  
+Latest v1 release: `v1.9.6` (2026-01-08).
 
 mimalloc is a drop-in replacement for `malloc` and can be used in other programs
 without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as:
 ```
 > LD_PRELOAD=/usr/lib/libmimalloc.so  myprogram
 ```
-It also has an easy way to override the default allocator in [Windows](#override_on_windows). Notable aspects of the design include:
+It also includes a way to dynamically override the default allocator in [Windows](#override_on_windows). 
+Notable aspects of the design include:
 
-- __small and consistent__: the library is about 8k LOC using simple and
+- __small and consistent__: the library is about 10k LOC using simple and
   consistent data structures. This makes it very suitable
   to integrate and adapt in other projects. For runtime systems it
   provides hooks for a monotonic _heartbeat_ and deferred freeing (for
   bounded worst-case times with reference counting).
   Partly due to its simplicity, mimalloc has been ported to many systems (Windows, macOS,
   Linux, WASM, various BSD's, Haiku, MUSL, etc) and has excellent support for dynamic overriding.
+  At the same time, it is an industrial strength allocator that runs (very) large scale
+  distributed services on thousands of machines with excellent worst case latencies.
 - __free list sharding__: instead of one big free list (per size class) we have
   many smaller lists per "mimalloc page" which reduces fragmentation and
   increases locality --
@@ -43,7 +47,7 @@ It also has an easy way to override the default allocator in [Windows](#override
   and the chance of contending on a single location will be low -- this is quite
   similar to randomized algorithms like skip lists where adding
   a random oracle removes the need for a more complex algorithm.
-- __eager page reset__: when a "page" becomes empty (with increased chance
+- __eager page purging__: when a "page" becomes empty (with increased chance
   due to free list sharding) the memory is marked to the OS as unused (reset or decommitted)
   reducing (real) memory pressure and fragmentation, especially in long running
   programs.
@@ -53,6 +57,7 @@ It also has an easy way to override the default allocator in [Windows](#override
   over our benchmarks.
 - __first-class heaps__: efficiently create and use multiple heaps to allocate across different regions.
   A heap can be destroyed at once instead of deallocating each object separately.
+  New: v3 has true first-class heaps where one can allocate in a heap from any thread.   
 - __bounded__: it does not suffer from _blowup_ \[1\], has bounded worst-case allocation
   times (_wcat_) (upto OS primitives), bounded space overhead (~0.2% meta-data, with low
   internal fragmentation), and has no internal points of contention using only atomic operations.
@@ -68,24 +73,75 @@ Enjoy!
 
 ### Branches
 
-* `master`: latest stable release (based on `dev-slice`).
-* `dev`: development branch for mimalloc v1. Use this branch for submitting PR's.
-* `dev-slice`: development branch for mimalloc v2. This branch is downstream of `dev`.
+* `main`: latest stable release (still based on `dev2`).
+* `dev`:  development branch for mimalloc v1. Use this branch for submitting PR's.
+* `dev2`: development branch for mimalloc v2. This branch is downstream of `dev` 
+          (and is essentially equal to `dev` except for `src/segment.c`). Uses larger sliced segments to manage
+          mimalloc pages that can reduce fragmentation.
+* `dev3`: development branch for mimalloc v3 rc1. This branch is downstream of `dev`. This version 
+          simplifies the lock-free ownership of previous versions, and improves sharing of memory between 
+          threads. On certain large workloads this version may use (much) less memory.
+          Also support true first-class heaps and more efficient heap-walking.
 
 ### Releases
 
-Note: the `v2.x` version has a new algorithm for managing internal mimalloc pages that tends to use reduce memory usage
-  and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance
-  (see [below](#performance)); please report if you observe any significant performance regression.
-
-* 2022-12-23, `v1.7.9`, `v2.0.9`: Supports building with asan and improved [Valgrind] support. Support abitrary large
-  alignments (in particular for `std::pmr` pools). 
-  Added C++ STL allocators attached to a specific heap (thanks @vmarkovtsev). 
-  Heap walks now visit all object (including huge objects). Support Windows nano server containers (by Johannes Schindelin,@dscho). 
+* 2026-01-08, `v1.9.6`, `v2.2.6`, `v3.2.6` (rc1) : Important bug fixes. Many improvements to v3 including 
+  true first-class heaps where one can allocate in heap from any thread, and track statistics per heap as well.
+  Added `MIMALLOC_ALLOW_THP` option. This is by default enabled except on Android. When THP is detected on v3,
+  mimalloc will set the `MIMALLOC_MINIMAL_PURGE_SIZE` to 2MiB to avoid breaking up potential THP huge pages.
+  v3 uses faster TLS access on Windows, and has improved performance for `mi_calloc` and aligned allocations.
+  Fixed rare race condition on older v3, fixed potential buffer overflow in debug statistics, add API for returning
+  allocated sizes on allocation and free.
+* 2025-06-09, `v1.9.4`, `v2.2.4`, `v3.1.4` (beta) : Some important bug fixes, including a case where OS memory
+  was not always fully released. Improved v3 performance, build on XBox, fix build on Android, support interpose 
+  for older macOS versions, use MADV_FREE_REUSABLE on macOS, always check commit success, better support for Windows 
+  fixed TLS offset, etc.
+* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3` (beta) : Various small bug and build fixes, including:
+  fix arm32 pre v7 builds, fix mingw build, get runtime statistics, improve statistic commit counts, 
+  fix execution on non BMI1 x64 systems. 
+* 2025-03-06, `v1.9.2`, `v2.2.2`, `v3.0.2-beta`: Various small bug and build fixes. 
+  Add `mi_options_print`, `mi_arenas_print`, and the experimental `mi_stat_get` and `mi_stat_get_json`. 
+  Add `mi_thread_set_in_threadpool` and `mi_heap_set_numa_affinity` (v3 only). Add vcpkg portfile. 
+  Upgrade mimalloc-redirect to v1.3.2. `MI_OPT_ARCH` is off by default now but still assumes armv8.1-a on arm64
+  for fast atomic operations. Add QNX support.
+* 2025-01-03, `v1.8.9`, `v2.1.9`, `v3.0.1-alpha`: Interim release. Support Windows arm64. New [guarded](#guarded) build that can place OS 
+  guard pages behind objects to catch buffer overflows as they occur. 
+  Many small fixes: build on Windows arm64, cygwin, riscV, and dragonfly; fix Windows static library initialization to account for
+  thread local destructors (in Rust/C++); macOS tag change; macOS TLS slot fix; improve stats; 
+  consistent `mimalloc.dll` on Windows (instead of `mimalloc-override.dll`); fix mimalloc-redirect on Win11 H2; 
+  add 0-byte to canary; upstream CPython fixes; reduce .bss size; allow fixed TLS slot on Windows for improved performance.
+* 2024-05-21, `v1.8.7`, `v2.1.7`: Fix build issues on less common platforms. Started upstreaming patches
+  from the CPython [integration](https://github.com/python/cpython/issues/113141#issuecomment-2119255217). Upstream `vcpkg` patches.
+* 2024-05-13, `v1.8.6`, `v2.1.6`: Fix build errors on various (older) platforms. Refactored aligned allocation.
+* 2024-04-22, `v1.8.4`, `v2.1.4`: Fixes various bugs and build issues. Add `MI_LIBC_MUSL` cmake flag for musl builds.
+  Free-ing code is refactored into a separate module (`free.c`). Mimalloc page info is simplified with the block size
+  directly available (and new `block_size_shift` to improve aligned block free-ing).
+  New approach to collection of abandoned segments: When
+  a thread terminates the segments it owns are abandoned (containing still live objects) and these can be
+  reclaimed by other threads. We no longer use a list of abandoned segments but this is now done using bitmaps in arena's
+  which is more concurrent (and more aggressive). Abandoned memory can now also be reclaimed if a thread frees an object in
+  an abandoned page (which can be disabled using `mi_option_abandoned_reclaim_on_free`). The option `mi_option_max_segment_reclaim`
+  gives a maximum percentage of abandoned segments that can be reclaimed per try (=10%).
+
+* 2023-04-24, `v1.8.2`, `v2.1.2`: Fixes build issues on freeBSD, musl, and C17 (UE 5.1.1). Reduce code size/complexity
+  by removing regions and segment-cache's and only use arenas with improved memory purging -- this may improve memory
+  usage as well for larger services. Renamed options for consistency. Improved Valgrind and ASAN checking.
+
+* 2023-04-03, `v1.8.1`, `v2.1.1`: Fixes build issues on some platforms.
+
+* 2023-03-29, `v1.8.0`, `v2.1.0`: Improved support dynamic overriding on Windows 11. Improved tracing precision
+  with [asan](#asan) and [Valgrind](#valgrind), and added Windows event tracing [ETW](#ETW) (contributed by Xinglong He). Created an OS
+  abstraction layer to make it easier to port and separate platform dependent code (in `src/prim`). Fixed C++ STL compilation on older Microsoft C++ compilers, and various small bug fixes.
+
+* 2022-12-23, `v1.7.9`, `v2.0.9`: Supports building with [asan](#asan) and improved [Valgrind](#valgrind) support.
+  Support arbitrary large alignments (in particular for `std::pmr` pools).
+  Added C++ STL allocators attached to a specific heap (thanks @vmarkovtsev).
+  Heap walks now visit all object (including huge objects). Support Windows nano server containers (by Johannes Schindelin,@dscho).
   Various small bug fixes.
 
-* 2022-11-03, `v1.7.7`, `v2.0.7`: Initial support for [Valgrind] for leak testing and heap block overflow detection. Initial
-  support for attaching heaps to a speficic memory area (only in v2). Fix `realloc` behavior for zero size blocks, remove restriction to integral multiple of the alignment in `alloc_align`, improved aligned allocation performance, reduced contention with many threads on few processors (thank you @dposluns!), vs2022 support, support `pkg-config`, .
+* 2022-11-03, `v1.7.7`, `v2.0.7`: Initial support for [Valgrind](#valgrind) for leak testing and heap block overflow
+  detection. Initial
+  support for attaching heaps to a specific memory area (only in v2). Fix `realloc` behavior for zero size blocks, remove restriction to integral multiple of the alignment in `alloc_align`, improved aligned allocation performance, reduced contention with many threads on few processors (thank you @dposluns!), vs2022 support, support `pkg-config`, .
 
 * 2022-04-14, `v1.7.6`, `v2.0.6`: fix fallback path for aligned OS allocation on Windows, improve Windows aligned allocation
   even when compiling with older SDK's, fix dynamic overriding on macOS Monterey, fix MSVC C++ dynamic overriding, fix
@@ -98,20 +154,6 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page
   improved wasm support, faster aligned allocation,
   various small fixes.
 
-* 2021-11-14, `v1.7.3`, `v2.0.3` (beta): improved WASM support, improved macOS support and performance (including
-  M1), improved performance for v2 for large objects, Python integration improvements, more standard
-  installation directories, various small fixes.
-
-* 2021-06-17, `v1.7.2`, `v2.0.2` (beta): support M1, better installation layout on Linux, fix
-  thread_id on Android, prefer 2-6TiB area for aligned allocation to work better on pre-windows 8, various small fixes.
-
-* 2021-04-06, `v1.7.1`, `v2.0.1` (beta): fix bug in arena allocation for huge pages, improved aslr on large allocations, initial M1 support (still experimental).
-
-* 2021-01-31, `v2.0.0`: beta release 2.0: new slice algorithm for managing internal mimalloc pages.
-
-* 2021-01-31, `v1.7.0`: stable release 1.7: support explicit user provided memory regions, more precise statistics,
-  improve macOS overriding, initial support for Apple M1, improved DragonFly support, faster memcpy on Windows, various small fixes.
-
 * [Older release notes](#older-release-notes)
 
 Special thanks to:
@@ -145,14 +187,14 @@ mimalloc is used in various large scale low-latency services and programs, for e
 
 ## Windows
 
-Open `ide/vs2019/mimalloc.sln` in Visual Studio 2019 and build.
-The `mimalloc` project builds a static library (in `out/msvc-x64`), while the
-`mimalloc-override` project builds a DLL for overriding malloc
+Open `ide/vs2022/mimalloc.sln` in Visual Studio 2022 and build.
+The `mimalloc-lib` project builds a static library (in `out/msvc-x64`), while the
+`mimalloc-override-dll` project builds a DLL for overriding malloc
 in the entire program.
 
-## macOS, Linux, BSD, etc.
+## Linux, macOS, BSD, etc.
 
-We use [`cmake`](https://cmake.org)<sup>1</sup> as the build system:
+We use [`cmake`](https://cmake.org) as the build system:
 
 ```
 > mkdir -p out/release
@@ -175,32 +217,58 @@ maintains detailed statistics as:
 > cmake -DCMAKE_BUILD_TYPE=Debug ../..
 > make
 ```
+
 This will name the shared library as `libmimalloc-debug.so`.
 
-Finally, you can build a _secure_ version that uses guard pages, encrypted
-free lists, etc., as:
+Finally, you can build a _secure_ version that uses guard pages, encrypted free lists, etc., as:
+
 ```
 > mkdir -p out/secure
 > cd out/secure
 > cmake -DMI_SECURE=ON ../..
 > make
 ```
+
 This will name the shared library as `libmimalloc-secure.so`.
-Use `ccmake`<sup>2</sup> instead of `cmake`
-to see and customize all the available build options.
+Use `cmake ../.. -LH` to see all the available build options.
+
+The examples use the default compiler. If you like to use another, use:
+
+```
+> CC=clang CXX=clang++ cmake ../..
+```
+
+## Cmake with Visual Studio
+
+You can also use cmake on Windows. Open a Visual Studio 2022 development prompt 
+and invoke `cmake` with the right [generator](https://cmake.org/cmake/help/latest/generator/Visual%20Studio%2017%202022.html) 
+and architecture, like:
+
+```
+> cmake ..\.. -G "Visual Studio 17 2022" -A x64 -DMI_OVERRIDE=ON
+```
+
+The cmake build type is specified when actually building, for example:
+
+```
+> cmake --build . --config=Release
+```
 
-Notes:
-1. Install CMake: `sudo apt-get install cmake`
-2. Install CCMake: `sudo apt-get install cmake-curses-gui`
+You can also install the [LLVM toolset](https://learn.microsoft.com/en-us/cpp/build/clang-support-msbuild?view=msvc-170#install-1) 
+on Windows to build with the `clang-cl` compiler directly:
+
+```
+> cmake ../.. -G "Visual Studio 17 2022" -T ClangCl
+```
 
 
-## Single source
+## Single Source
 
 You can also directly build the single `src/static.c` file as part of your project without
 needing `cmake` at all. Make sure to also add the mimalloc `include` directory to the include path.
 
 
-# Using the library
+# Using the Library
 
 The preferred usage is including `<mimalloc.h>`, linking with
 the shared- or static library, and using the `mi_malloc` API exclusively for allocation. For example,
@@ -212,7 +280,7 @@ mimalloc uses only safe OS calls (`mmap` and `VirtualAlloc`) and can co-exist
 with other allocators linked to the same program.
 If you use `cmake`, you can simply use:
 ```
-find_package(mimalloc 1.4 REQUIRED)
+find_package(mimalloc 1.8 REQUIRED)
 ```
 in your `CMakeLists.txt` to find a locally installed mimalloc. Then use either:
 ```
@@ -225,8 +293,8 @@ target_link_libraries(myapp PUBLIC mimalloc-static)
 to link with the static library. See `test\CMakeLists.txt` for an example.
 
 For best performance in C++ programs, it is also recommended to override the
-global `new` and `delete` operators. For convience, mimalloc provides
-[`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) which does this for you -- just include it in a single(!) source file in your project.
+global `new` and `delete` operators. For convenience, mimalloc provides
+[`mimalloc-new-delete.h`](include/mimalloc-new-delete.h) which does this for you -- just include it in a single(!) source file in your project.
 In C++, mimalloc also provides the `mi_stl_allocator` struct which implements the `std::allocator`
 interface.
 
@@ -273,43 +341,54 @@ completely and redirect all calls to the _mimalloc_ library instead .
 
 ## Environment Options
 
-You can set further options either programmatically (using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)),
-or via environment variables:
+You can set further options either programmatically (using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)), or via environment variables:
 
 - `MIMALLOC_SHOW_STATS=1`: show statistics when the program terminates.
-- `MIMALLOC_VERBOSE=1`: show verbose messages.
+- `MIMALLOC_VERBOSE=1`: show verbose messages (including statistics).
 - `MIMALLOC_SHOW_ERRORS=1`: show error and warning messages.
-- `MIMALLOC_PAGE_RESET=0`: by default, mimalloc will reset (or purge) OS pages that are not in use, to signal to the OS
-   that the underlying physical memory can be reused. This can reduce memory fragmentation in long running (server)
-   programs. By setting it to `0` this will no longer be done which can improve performance for batch-like programs.
-   As an alternative, the `MIMALLOC_RESET_DELAY=`<msecs> can be set higher (100ms by default) to make the page
-   reset occur less frequently instead of turning it off completely.
+
+Advanced options:
+
+- `MIMALLOC_ARENA_EAGER_COMMIT=2`: turns on eager commit for the large arenas (usually 1GiB) from which mimalloc
+   allocates segments and pages. Set this to 2 (default) to
+   only enable this on overcommit systems (e.g. Linux). Set this to 1 to enable explicitly on other systems
+   as well (like Windows or macOS) which may improve performance (as the whole arena is committed at once).
+   Note that eager commit only increases the commit but not the actual the peak resident set
+   (rss) so it is generally ok to enable this.
+- `MIMALLOC_PURGE_DELAY=N`: the delay in `N` milli-seconds (by default `1000` in v3) after which mimalloc will purge
+   OS pages that are not in use. This signals to the OS that the underlying physical memory can be reused which
+   can reduce memory fragmentation especially in long running (server) programs. Setting `N` to `0` purges immediately when
+   a page becomes unused which can improve memory usage but also decreases performance.
+   Setting it to `-1` disables purging completely.
+- `MIMALLOC_PURGE_DECOMMITS=1`: By default "purging" memory means unused memory is decommitted (`MEM_DECOMMIT` on Windows,
+   `MADV_DONTNEED` (which decresease rss immediately) on `mmap` systems). Set this to 0 to instead "reset" unused
+   memory on a purge (`MEM_RESET` on Windows, generally `MADV_FREE` (which does not decrease rss immediately) on `mmap` systems).
+   Mimalloc generally does not "free" OS memory but only "purges" OS memory, in other words, it tries to keep virtual
+   address ranges and decommits within those ranges (to make the underlying physical memory available to other processes).
+
+Further options for large workloads and services:
+
+- `MIMALLOC_ALLOW_THP=1`: By default always allow transparent huge pages (THP) on Linux systems. On Android only this is
+   by default off. When set to `0`, THP is disabled for the process that mimalloc runs in. If enabled, mimalloc also sets
+   the `MIMALLOC_MINIMAL_PURGE_SIZE` in v3 to 2MiB to avoid potentially breaking up transparent huge pages.
 - `MIMALLOC_USE_NUMA_NODES=N`: pretend there are at most `N` NUMA nodes. If not set, the actual NUMA nodes are detected
    at runtime. Setting `N` to 1 may avoid problems in some virtual environments. Also, setting it to a lower number than
    the actual NUMA nodes is fine and will only cause threads to potentially allocate more memory across actual NUMA
    nodes (but this can happen in any case as NUMA local allocation is always a best effort but not guaranteed).
-- `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages (2MiB) when available; for some workloads this can significantly
-   improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs
-   to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes
+- `MIMALLOC_ALLOW_LARGE_OS_PAGES=0`: Set to 1 to use large OS pages (2 or 4MiB) when available; for some workloads this can
+   significantly improve performance. However, large OS pages cannot be purged or shared with other processes so may lead
+   to increased memory usage in some cases.
+   Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs
+   to explicitly give permissions for large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes
    the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that
    can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead whenever possible).
-   <!--
-   - `MIMALLOC_EAGER_REGION_COMMIT=1`: on Windows, commit large (256MiB) regions eagerly. On Windows, these regions
-   show in the working set even though usually just a small part is committed to physical memory. This is why it
-   turned off by default on Windows as it looks not good in the task manager. However, turning it on has no
-   real drawbacks and may improve performance by a little.
-   -->
-- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB _huge_ OS pages. This reserves the huge pages at
+- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where `N` is the number of 1GiB _huge_ OS pages. This reserves the huge pages at
    startup and sometimes this can give a large (latency) performance improvement on big workloads.
-   Usually it is better to not use
-   `MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving
+   Usually it is better to not use `MIMALLOC_ALLOW_LARGE_OS_PAGES=1` in combination with this setting. Just like large
+   OS pages, use with care as reserving
    contiguous physical memory can take a long time when memory is fragmented (but reserving the huge pages is done at
    startup only once).
-   Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])).
-   With huge OS pages, it may be beneficial to set the setting
-   `MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB)
-   of a thread to not allocate in the huge OS pages; this prevents threads that are short lived
-   and allocate just a little to take up space in the huge OS page area (which cannot be reset).
+   Note that we usually need to explicitly give permission for huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])).
    The huge pages are usually allocated evenly among NUMA nodes.
    We can use `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N` where `N` is the numa node (starting at 0) to allocate all
    the huge pages at a specific numa node instead.
@@ -340,50 +419,38 @@ As always, evaluate with care as part of an overall security strategy as all of
 
 ## Debug Mode
 
-When _mimalloc_ is built using debug mode, various checks are done at runtime to catch development errors.
+When _mimalloc_ is built using debug mode, (`-DCMAKE_BUILD_TYPE=Debug`), 
+various checks are done at runtime to catch development errors.
 
 - Statistics are maintained in detail for each object size. They can be shown using `MIMALLOC_SHOW_STATS=1` at runtime.
 - All objects have padding at the end to detect (byte precise) heap block overflows.
 - Double free's, and freeing invalid heap pointers are detected.
 - Corrupted free-lists and some forms of use-after-free are detected.
 
-## Valgrind
+## Guarded Mode
 
-Generally, we recommend using the standard allocator with the amazing [Valgrind] tool (and
-also for other address sanitizers).
-However, it is possible to build mimalloc with Valgrind support. This has a small performance
-overhead but does allow detecting memory leaks and byte-precise buffer overflows directly on final
-executables. To build with valgrind support, use the `MI_VALGRIND=ON` cmake option:
+<span id="guarded">_mimalloc_ can be build in guarded mode using the `-DMI_GUARDED=ON` flags in `cmake`.</span>
+This enables placing OS guard pages behind certain object allocations to catch buffer overflows as they occur.
+This can be invaluable to catch buffer-overflow bugs in large programs. However, it also means that any object
+allocated with a guard page takes at least 8 KiB memory for the guard page and its alignment. As such, allocating
+a guard page for every allocation may be too expensive both in terms of memory, and in terms of performance with
+many system calls. Therefore, there are various environment variables (and options) to tune this:
 
-```
-> cmake ../.. -DMI_VALGRIND=ON
-```
+- `MIMALLOC_GUARDED_SAMPLE_RATE=N`: Set the sample rate to `N` (by default 4000). This mode places a guard page
+  behind every `N` suitable object allocations (per thread). Since the performance in guarded mode without placing
+  guard pages is close to release mode, this can be used to enable guard pages even in production to catch latent 
+  buffer overflow bugs. Set the sample rate to `1` to guard every object, and to `0` to place no guard pages at all.
 
-This can also be combined with secure mode or debug mode.
-You can then run your programs directly under valgrind:
+- `MIMALLOC_GUARDED_SAMPLE_SEED=N`: Start sampling at `N` (by default random). Can be used to reproduce a buffer
+  overflow if needed.
 
-```
-> valgrind <myprogram>
-```
+- `MIMALLOC_GUARDED_MIN=N`, `MIMALLOC_GUARDED_MAX=N`: Minimal and maximal _rounded_ object sizes for which a guard 
+  page is considered (`0` and `1GiB` respectively). If you suspect a buffer overflow occurs with an object of size
+  141, set the minimum and maximum to `148` and the sample rate to `1` to have all of those guarded.
 
-If you rely on overriding `malloc`/`free` by mimalloc (instead of using the `mi_malloc`/`mi_free` API directly),
-you also need to tell `valgrind` to not intercept those calls itself, and use:
-
-```
-> MIMALLOC_SHOW_STATS=1 valgrind  --soname-synonyms=somalloc=*mimalloc* -- <myprogram>
-```
-
-By setting the `MIMALLOC_SHOW_STATS` environment variable you can check that mimalloc is indeed
-used and not the standard allocator. Even though the [Valgrind option][valgrind-soname]
-is called `--soname-synonyms`, this also
-works when overriding with a static library or object file. Unfortunately, it is not possible to
-dynamically override mimalloc using `LD_PRELOAD` together with `valgrind`.
-See also the `test/test-wrong.c` file to test with `valgrind`.
-
-Valgrind support is in its initial development -- please report any issues.
-
-[Valgrind]: https://valgrind.org/
-[valgrind-soname]: https://valgrind.org/docs/manual/manual-core.html#opt.soname-synonyms
+- `MIMALLOC_GUARDED_PRECISE=1`: If we have an object of size 13, we would usually place it an aligned 16 bytes in
+  front of the guard page. Using `MIMALLOC_GUARDED_PRECISE` places it exactly 13 bytes before a page so that even
+  a 1 byte overflow is detected. This violates the C/C++ minimal alignment guarantees though so use with care.
 
 
 # Overriding Standard Malloc
@@ -394,7 +461,7 @@ Overriding the standard `malloc` (and `new`) can be done either _dynamically_ or
 
 This is the recommended way to override the standard malloc interface.
 
-### Override on Linux, BSD
+### Dynamic Override on Linux, BSD
 
 On these ELF-based systems we preload the mimalloc shared
 library so all calls to the standard `malloc` interface are
@@ -413,7 +480,7 @@ or run with the debug version to get detailed statistics:
 > env MIMALLOC_SHOW_STATS=1 LD_PRELOAD=/usr/lib/libmimalloc-debug.so myprogram
 ```
 
-### Override on MacOS
+### Dynamic Override on MacOS
 
 On macOS we can also preload the mimalloc shared
 library so all calls to the standard `malloc` interface are
@@ -426,55 +493,163 @@ Note that certain security restrictions may apply when doing this from
 the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash).
 
 
-### Override on Windows
+### Dynamic Override on Windows
 
-<span id="override_on_windows">Overriding on Windows</span> is robust and has the
-particular advantage to be able to redirect all malloc/free calls that go through
-the (dynamic) C runtime allocator, including those from other DLL's or libraries.
+<span id="override_on_windows">We use a separate redirection DLL to override mimalloc on Windows</span> 
+such that we redirect all malloc/free calls that go through the (dynamic) C runtime allocator, 
+including those from other DLL's or libraries. As it intercepts all allocation calls on a low level, 
+it can be used on large programs that include other 3rd party components.
+There are four requirements to make the overriding work well:
 
-The overriding on Windows requires that you link your program explicitly with
-the mimalloc DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch).
-Also, the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) must be put
-in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency).
-The redirection DLL ensures that all calls to the C runtime malloc API get redirected to
-mimalloc (in `mimalloc-override.dll`).
+1. Use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch).
 
-To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some
-call to the mimalloc API in the `main` function, like `mi_version()`
-(or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project
-for an example on how to use this. For best performance on Windows with C++, it
-is also recommended to also override the `new`/`delete` operations (by including
-[`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) a single(!) source file in your project).
+2. Link your program explicitly with the `mimalloc.dll.lib` export library for the `mimalloc.dll`.
+   (which must be compiled with `-DMI_OVERRIDE=ON`, which is the default though).
+   To ensure the `mimalloc.dll` is actually loaded at run-time it is easiest 
+   to insert some call to the mimalloc API in the `main` function, like `mi_version()`
+   (or use the `/include:mi_version` switch on the linker command, or
+   similarly, `#pragma comment(linker, "/include:mi_version")` in some source file). 
+   See the `mimalloc-test-override` project for an example on how to use this. 
+
+3. The `mimalloc-redirect.dll` must be put in the same directory as the main 
+   `mimalloc.dll` at runtime (as it is a dependency of that DLL).
+   The redirection DLL ensures that all calls to the C runtime malloc API get 
+   redirected to mimalloc functions (which reside in `mimalloc.dll`).
+
+4. Ensure the `mimalloc.dll` comes as early as possible in the import
+   list of the final executable (so it can intercept all potential allocations).
+   You can use `minject -l <exe>` to check this if needed.
+
+For best performance on Windows with C++, it is also recommended to also override 
+the `new`/`delete` operations (by including [`mimalloc-new-delete.h`](include/mimalloc-new-delete.h)
+a single(!) source file in your project).
 
 The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic
-overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully redirected.
+overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully 
+redirected.
 
-(Note: in principle, it is possible to even patch existing executables without any recompilation
-if they are linked with the dynamic C runtime (`ucrtbase.dll`) -- just put the `mimalloc-override.dll`
-into the import table (and put `mimalloc-redirect.dll` in the same folder)
-Such patching can be done for example with [CFF Explorer](https://ntcore.com/?page_id=388)).
+For different platforms than x64, you may need a specific [redirection dll](bin).
+Furthermore, we cannot always re-link an executable or ensure `mimalloc.dll` comes
+first in the import table. In such cases the [`minject`](bin) tool can be used
+to patch the executable's import tables.
 
 
 ## Static override
 
 On Unix-like systems, you can also statically link with _mimalloc_ to override the standard
 malloc interface. The recommended way is to link the final program with the
-_mimalloc_ single object file (`mimalloc-override.o`). We use
+_mimalloc_ single object file (`mimalloc.o`). We use
 an object file instead of a library file as linkers give preference to
 that over archives to resolve symbols. To ensure that the standard
 malloc interface resolves to the _mimalloc_ library, link it as the first
 object file. For example:
+
 ```
-> gcc -o myprogram mimalloc-override.o  myfile1.c ...
+> gcc -o myprogram mimalloc.o  myfile1.c ...
 ```
 
 Another way to override statically that works on all platforms, is to
 link statically to mimalloc (as shown in the introduction) and include a
 header file in each source file that re-defines `malloc` etc. to `mi_malloc`.
-This is provided by [`mimalloc-override.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-override.h). This only works reliably though if all sources are
+This is provided by [`mimalloc-override.h`](include/mimalloc-override.h). This only works 
+reliably though if all sources are
 under your control or otherwise mixing of pointers from different heaps may occur!
 
 
+# Tools
+
+Generally, we recommend using the standard allocator with memory tracking tools, but mimalloc
+can also be build to support the [address sanitizer][asan] or the excellent [Valgrind] tool.
+Moreover, it can be build to support Windows event tracing ([ETW]).
+This has a small performance overhead but does allow detecting memory leaks and byte-precise
+buffer overflows directly on final executables. See also the `test/test-wrong.c` file to test with various tools.
+
+## Valgrind
+
+To build with [valgrind] support, use the `MI_TRACK_VALGRIND=ON` cmake option:
+
+```
+> cmake ../.. -DMI_TRACK_VALGRIND=ON
+```
+
+This can also be combined with secure mode or debug mode.
+You can then run your programs directly under valgrind:
+
+```
+> valgrind <myprogram>
+```
+
+If you rely on overriding `malloc`/`free` by mimalloc (instead of using the `mi_malloc`/`mi_free` API directly),
+you also need to tell `valgrind` to not intercept those calls itself, and use:
+
+```
+> MIMALLOC_SHOW_STATS=1 valgrind  --soname-synonyms=somalloc=*mimalloc* -- <myprogram>
+```
+
+By setting the `MIMALLOC_SHOW_STATS` environment variable you can check that mimalloc is indeed
+used and not the standard allocator. Even though the [Valgrind option][valgrind-soname]
+is called `--soname-synonyms`, this also works when overriding with a static library or object file.
+To dynamically override mimalloc using `LD_PRELOAD` together with `valgrind`, use:
+
+```
+> valgrind --trace-children=yes --soname-synonyms=somalloc=*mimalloc* /usr/bin/env LD_PRELOAD=/usr/lib/libmimalloc.so -- <myprogram>
+```
+
+See also the `test/test-wrong.c` file to test with `valgrind`.
+
+Valgrind support is in its initial development -- please report any issues.
+
+[Valgrind]: https://valgrind.org/
+[valgrind-soname]: https://valgrind.org/docs/manual/manual-core.html#opt.soname-synonyms
+
+## ASAN
+
+To build with the address sanitizer, use the `-DMI_TRACK_ASAN=ON` cmake option:
+
+```
+> cmake ../.. -DMI_TRACK_ASAN=ON
+```
+
+This can also be combined with secure mode or debug mode.
+You can then run your programs as:'
+
+```
+> ASAN_OPTIONS=verbosity=1 <myprogram>
+```
+
+When you link a program with an address sanitizer build of mimalloc, you should
+generally compile that program too with the address sanitizer enabled.
+For example, assuming you build mimalloc in `out/debug`:
+
+```
+clang -g -o test-wrong -Iinclude test/test-wrong.c out/debug/libmimalloc-asan-debug.a -lpthread -fsanitize=address -fsanitize-recover=address
+```
+
+Since the address sanitizer redirects the standard allocation functions, on some platforms (macOSX for example)
+it is required to compile mimalloc with `-DMI_OVERRIDE=OFF`.
+Address sanitizer support is in its initial development -- please report any issues.
+
+[asan]: https://github.com/google/sanitizers/wiki/AddressSanitizer
+
+## ETW
+
+Event tracing for Windows ([ETW]) provides a high performance way to capture all allocations though
+mimalloc and analyze them later. To build with ETW support, use the `-DMI_TRACK_ETW=ON` cmake option.
+
+You can then capture an allocation trace using the Windows performance recorder (WPR), using the
+`src/prim/windows/etw-mimalloc.wprp` profile. In an admin prompt, you can use:
+```
+> wpr -start src\prim\windows\etw-mimalloc.wprp -filemode
+> <my_mimalloc_program>
+> wpr -stop <my_mimalloc_program>.etl
+```
+and then open `<my_mimalloc_program>.etl` in the Windows Performance Analyzer (WPA), or
+use a tool like [TraceControl] that is specialized for analyzing mimalloc traces.
+
+[ETW]: https://learn.microsoft.com/en-us/windows-hardware/test/wpt/event-tracing-for-windows
+[TraceControl]: https://github.com/xinglonghe/TraceControl
+
+
 # Performance
 
 Last update: 2021-01-30
@@ -735,6 +910,16 @@ provided by the bot. You will only need to do this once across all repos using o
 
 # Older Release Notes
 
+* 2021-11-14, `v1.7.3`, `v2.0.3` (beta): improved WASM support, improved macOS support and performance (including
+  M1), improved performance for v2 for large objects, Python integration improvements, more standard
+  installation directories, various small fixes.
+* 2021-06-17, `v1.7.2`, `v2.0.2` (beta): support M1, better installation layout on Linux, fix
+  thread_id on Android, prefer 2-6TiB area for aligned allocation to work better on pre-windows 8, various small fixes.
+* 2021-04-06, `v1.7.1`, `v2.0.1` (beta): fix bug in arena allocation for huge pages, improved aslr on large allocations, initial M1 support (still experimental).
+* 2021-01-31, `v2.0.0`: beta release 2.0: new slice algorithm for managing internal mimalloc pages.
+* 2021-01-31, `v1.7.0`: stable release 1.7: support explicit user provided memory regions, more precise statistics,
+  improve macOS overriding, initial support for Apple M1, improved DragonFly support, faster memcpy on Windows, various small fixes.
+
 * 2020-09-24, `v1.6.7`: stable release 1.6: using standard C atomics, passing tsan testing, improved
   handling of failing to commit on Windows, add [`mi_process_info`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc.h#L156) api call.
 * 2020-08-06, `v1.6.4`: stable release 1.6: improved error recovery in low-memory situations,
@@ -756,6 +941,7 @@ provided by the bot. You will only need to do this once across all repos using o
 more eager concurrent free, addition of STL allocator, fixed potential memory leak.
 * 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and [stronger
 free list encoding](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) in secure mode.
+
 * 2019-12-22, `v1.2.2`: stable release 1.2: minor updates.
 * 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows.
 * 2019-10-07, `v1.1.0`: stable release 1.1.
diff --git a/3rd/mimalloc-3.2.6/src/alloc-aligned.c b/3rd/mimalloc-3.2.6/src/alloc-aligned.c
new file mode 100644
index 00000000..0651e2c3
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/alloc-aligned.c
@@ -0,0 +1,439 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2018-2025, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+#include "mimalloc.h"
+#include "mimalloc/internal.h"
+#include "mimalloc/prim.h"  // _mi_theap_default
+
+#include <string.h>     // memset
+
+// ------------------------------------------------------
+// Aligned Allocation
+// ------------------------------------------------------
+
+static bool mi_malloc_is_naturally_aligned( size_t size, size_t alignment ) {
+  // certain blocks are always allocated at a certain natural alignment.
+  // (see also `arena.c:mi_arenas_page_alloc_fresh`).
+  mi_assert_internal(_mi_is_power_of_two(alignment) && (alignment > 0));
+  if (alignment > size) return false;
+  const size_t bsize = mi_good_size(size);
+  const bool ok = (bsize <= MI_PAGE_MAX_START_BLOCK_ALIGN2 && _mi_is_power_of_two(bsize)) ||             // power-of-two under N
+                  (alignment==MI_PAGE_OSPAGE_BLOCK_ALIGN2 && (bsize % MI_PAGE_OSPAGE_BLOCK_ALIGN2)==0);  // or multiple of N
+  if (ok) { mi_assert_internal((bsize & (alignment-1)) == 0); } // since both power of 2 and alignment <= size
+  return ok;
+}
+
+#if MI_GUARDED
+static mi_decl_restrict void* mi_theap_malloc_guarded_aligned(mi_theap_t* theap, size_t size, size_t alignment, bool zero) mi_attr_noexcept {
+  // use over allocation for guarded blocksl
+  #if MI_THEAP_INITASNULL
+  if mi_unlikely(theap==NULL) { theap = _mi_theap_empty_get(); }
+  #endif
+  mi_assert_internal(alignment > 0 && alignment < MI_PAGE_MAX_OVERALLOC_ALIGN);
+  const size_t oversize = size + alignment - 1;
+  void* base = _mi_theap_malloc_guarded(theap, oversize, zero);
+  void* p = _mi_align_up_ptr(base, alignment);
+  mi_track_align(base, p, (uint8_t*)p - (uint8_t*)base, size);
+  mi_assert_internal(mi_usable_size(p) >= size);
+  mi_assert_internal(_mi_is_aligned(p, alignment));
+  return p;
+}
+
+static void* mi_theap_malloc_zero_no_guarded(mi_theap_t* theap, size_t size, bool zero, size_t* usable) {
+  #if MI_THEAP_INITASNULL
+  if mi_unlikely(theap==NULL) { theap = _mi_theap_empty_get(); }
+  #endif
+  const size_t rate = theap->guarded_sample_rate;
+  // only write if `rate!=0` so we don't write to the constant `_mi_theap_empty`
+  if (rate != 0) { theap->guarded_sample_rate = 0; }
+  void* p = _mi_theap_malloc_zero(theap, size, zero, usable);
+  if (rate != 0) { theap->guarded_sample_rate = rate; }
+  return p;
+}
+#else
+static void* mi_theap_malloc_zero_no_guarded(mi_theap_t* theap, size_t size, bool zero, size_t* usable) {
+  return _mi_theap_malloc_zero(theap, size, zero, usable);
+}
+#endif
+
+// Fallback aligned allocation that over-allocates -- split out for better codegen
+static mi_decl_noinline void* mi_theap_malloc_zero_aligned_at_overalloc(mi_theap_t* const theap, const size_t size, const size_t alignment, const size_t offset, const bool zero, size_t* usable) mi_attr_noexcept
+{
+  mi_assert_internal(size <= (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE));
+  mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment));
+
+  void* p;
+  size_t oversize;
+  if mi_unlikely(alignment > MI_PAGE_MAX_OVERALLOC_ALIGN) {
+    // use OS allocation for large alignments and allocate inside a singleton page (not in an arena)
+    // This can support alignments >= MI_PAGE_ALIGN by ensuring the object can be aligned
+    // in the first (and single) page such that the page info is `MI_PAGE_ALIGN` bytes before it (and can be found in the _mi_page_map).
+    if mi_unlikely(offset != 0) {
+      // todo: cannot support offset alignment for very large alignments yet
+      #if MI_DEBUG > 0
+      _mi_error_message(EOVERFLOW, "aligned allocation with a large alignment cannot be used with an alignment offset (size %zu, alignment %zu, offset %zu)\n", size, alignment, offset);
+      #endif
+      return NULL;
+    }
+    oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size);
+    // note: no guarded as alignment > 0
+    p = _mi_theap_malloc_zero_ex(theap, oversize, zero, alignment, usable); // the page block size should be large enough to align in the single huge page block
+    if (p == NULL) return NULL;
+  }
+  else {
+    // otherwise over-allocate
+    oversize = (size < MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : size) + alignment - 1;  // adjust for size <= 16; with size 0 and aligment 64k, we would allocate a 64k block and pointing just beyond that.
+    p = mi_theap_malloc_zero_no_guarded(theap, oversize, zero, usable);
+    if (p == NULL) return NULL;
+  }
+
+  // .. and align within the allocation
+  const uintptr_t align_mask = alignment - 1;  // for any x, `(x & align_mask) == (x % alignment)`
+  const uintptr_t poffset = ((uintptr_t)p + offset) & align_mask;
+  const uintptr_t adjust  = (poffset == 0 ? 0 : alignment - poffset);
+  mi_assert_internal(adjust < alignment);
+  void* aligned_p = (void*)((uintptr_t)p + adjust);
+
+  // note: after the above allocation, the page may be abandoned now (as it became full, see `page.c:_mi_malloc_generic`)
+  // and we no longer own it. We should be careful to only read constant fields in the page,
+  // or use safe atomic access as in `mi_page_set_has_interior_pointers`.
+  // (we can access the page though since the just allocated pointer keeps it alive)
+  mi_page_t* page = _mi_ptr_page(p);
+  if (aligned_p != p) {
+    mi_page_set_has_interior_pointers(page, true);
+    #if MI_GUARDED
+    // set tag to aligned so mi_usable_size works with guard pages
+    if (adjust >= sizeof(mi_block_t)) {
+      mi_block_t* const block = (mi_block_t*)p;
+      block->next = MI_BLOCK_TAG_ALIGNED;
+    }
+    #endif
+    _mi_padding_shrink(page, (mi_block_t*)p, adjust + size);
+  }
+  // todo: expand padding if overallocated ?
+
+  mi_assert_internal(mi_page_usable_block_size(page) >= adjust + size);
+  mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0);
+  mi_assert_internal(mi_usable_size(aligned_p)>=size);
+  mi_assert_internal(mi_usable_size(p) == mi_usable_size(aligned_p)+adjust);
+  #if MI_DEBUG > 1
+  mi_page_t* const apage = _mi_ptr_page(aligned_p);
+  void* unalign_p = _mi_page_ptr_unalign(apage, aligned_p);
+  mi_assert_internal(p == unalign_p);
+  #endif
+
+  // now zero the block if needed
+  //if (alignment > MI_PAGE_MAX_OVERALLOC_ALIGN) {
+  //  // for the tracker, on huge aligned allocations only from the start of the large block is defined
+  //  mi_track_mem_undefined(aligned_p, size);
+  //  if (zero) {
+  //    _mi_memzero_aligned(aligned_p, mi_usable_size(aligned_p));
+  //  }
+  //}
+
+  if (p != aligned_p) {
+    mi_track_align(p,aligned_p,adjust,mi_usable_size(aligned_p));
+    #if MI_GUARDED
+    mi_track_mem_defined(p, sizeof(mi_block_t));
+    #endif
+  }
+  return aligned_p;
+}
+
+// Generic primitive aligned allocation -- split out for better codegen
+static mi_decl_noinline void* mi_theap_malloc_zero_aligned_at_generic(mi_theap_t* const theap, const size_t size, const size_t alignment, const size_t offset, const bool zero, size_t* usable) mi_attr_noexcept
+{
+  mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment));
+  // we don't allocate more than MI_MAX_ALLOC_SIZE (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
+  if mi_unlikely(size > (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE)) {
+    #if MI_DEBUG > 0
+    _mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment);
+    #endif
+    return NULL;
+  }
+
+  // use regular allocation if it is guaranteed to fit the alignment constraints.
+  // this is important to try as the fast path in `mi_theap_malloc_zero_aligned` only works when there exist
+  // a page with the right block size, and if we always use the over-alloc fallback that would never happen.
+  if (offset == 0 && mi_malloc_is_naturally_aligned(size,alignment)) {
+    void* p = mi_theap_malloc_zero_no_guarded(theap, size, zero, usable);
+    mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0);
+    const bool is_aligned_or_null = (((uintptr_t)p) & (alignment-1))==0;
+    if mi_likely(is_aligned_or_null) {
+      return p;
+    }
+    else {
+      // this should never happen if the `mi_malloc_is_naturally_aligned` check is correct..
+      mi_assert(false);
+      mi_free(p);
+    }
+  }
+
+  // fall back to over-allocation
+  return mi_theap_malloc_zero_aligned_at_overalloc(theap,size,alignment,offset,zero,usable);
+}
+
+
+// Primitive aligned allocation
+static inline void* mi_theap_malloc_zero_aligned_at(mi_theap_t* const theap, const size_t size, const size_t alignment, const size_t offset, const bool zero, size_t* usable) mi_attr_noexcept
+{
+  // note: we don't require `size > offset`, we just guarantee that the address at offset is aligned regardless of the allocated size.
+  if mi_unlikely(alignment == 0 || !_mi_is_power_of_two(alignment)) { // require power-of-two (see <https://en.cppreference.com/w/c/memory/aligned_alloc>)
+    #if MI_DEBUG > 0
+    _mi_error_message(EOVERFLOW, "aligned allocation requires the alignment to be a power-of-two (size %zu, alignment %zu)\n", size, alignment);
+    #endif
+    return NULL;
+  }
+
+  #if MI_GUARDED
+  #if MI_THEAP_INITASNULL
+  if mi_likely(theap!=NULL)
+  #endif
+  if (offset==0 && alignment < MI_PAGE_MAX_OVERALLOC_ALIGN && mi_theap_malloc_use_guarded(theap,size)) {
+    return mi_theap_malloc_guarded_aligned(theap, size, alignment, zero);
+  }
+  #endif
+
+  // try first if there happens to be a small block available with just the right alignment
+  // since most small power-of-2 blocks (under MI_PAGE_MAX_BLOCK_START_ALIGN2) are already
+  // naturally aligned this can be often the case.
+  #if MI_THEAP_INITASNULL
+  if mi_likely(theap!=NULL)
+  #endif
+  {
+    if mi_likely(size <= MI_SMALL_SIZE_MAX && alignment <= size) {
+      const uintptr_t align_mask = alignment-1;       // for any x, `(x & align_mask) == (x % alignment)`
+      const size_t padsize = size + MI_PADDING_SIZE;
+      mi_page_t* page = _mi_theap_get_free_small_page(theap, padsize);
+      if mi_likely(page->free != NULL) {
+        const bool is_aligned = (((uintptr_t)page->free + offset) & align_mask)==0;
+        if mi_likely(is_aligned)
+        {
+          if (usable!=NULL) { *usable = mi_page_usable_block_size(page); }
+          void* p = (zero ? _mi_page_malloc_zeroed(theap, page, padsize) : _mi_page_malloc(theap, page, padsize)); // call specific page malloc for better codegen
+          mi_assert_internal(p != NULL);
+          mi_assert_internal(((uintptr_t)p + offset) % alignment == 0);
+          mi_track_malloc(p, size, zero);
+          return p;
+        }
+      }
+    }
+  }
+
+  // fallback to generic aligned allocation
+  return mi_theap_malloc_zero_aligned_at_generic(theap, size, alignment, offset, zero, usable);
+}
+
+
+// ------------------------------------------------------
+// Internal mi_theap_malloc_aligned / mi_malloc_aligned
+// ------------------------------------------------------
+
+static mi_decl_restrict void* mi_theap_malloc_aligned_at(mi_theap_t* theap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
+  return mi_theap_malloc_zero_aligned_at(theap, size, alignment, offset, false, NULL);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_theap_malloc_aligned(mi_theap_t* theap, size_t size, size_t alignment) mi_attr_noexcept {
+  return mi_theap_malloc_aligned_at(theap, size, alignment, 0);
+}
+
+static mi_decl_restrict void* mi_theap_zalloc_aligned_at(mi_theap_t* theap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
+  return mi_theap_malloc_zero_aligned_at(theap, size, alignment, offset, true, NULL);
+}
+
+static mi_decl_restrict void* mi_theap_zalloc_aligned(mi_theap_t* theap, size_t size, size_t alignment) mi_attr_noexcept {
+  return mi_theap_zalloc_aligned_at(theap, size, alignment, 0);
+}
+
+static mi_decl_restrict void* mi_theap_calloc_aligned_at(mi_theap_t* theap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
+  size_t total;
+  if (mi_count_size_overflow(count, size, &total)) return NULL;
+  return mi_theap_zalloc_aligned_at(theap, total, alignment, offset);
+}
+
+static mi_decl_restrict void* mi_theap_calloc_aligned(mi_theap_t* theap, size_t count, size_t size, size_t alignment) mi_attr_noexcept {
+  return mi_theap_calloc_aligned_at(theap, count, size, alignment, 0);
+}
+
+
+// ------------------------------------------------------
+// Aligned Allocation
+// ------------------------------------------------------
+
+mi_decl_nodiscard mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
+  return mi_theap_malloc_aligned_at(_mi_theap_default(), size, alignment, offset);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept {
+  return mi_theap_malloc_aligned(_mi_theap_default(), size, alignment);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_umalloc_aligned(size_t size, size_t alignment, size_t* block_size) mi_attr_noexcept {
+  return mi_theap_malloc_zero_aligned_at(_mi_theap_default(), size, alignment, 0, false, block_size);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
+  return mi_theap_zalloc_aligned_at(_mi_theap_default(), size, alignment, offset);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept {
+  return mi_theap_zalloc_aligned(_mi_theap_default(), size, alignment);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_uzalloc_aligned(size_t size, size_t alignment, size_t* block_size) mi_attr_noexcept {
+  return mi_theap_malloc_zero_aligned_at(_mi_theap_default(), size, alignment, 0, true, block_size);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
+  return mi_theap_calloc_aligned_at(_mi_theap_default(), count, size, alignment, offset);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept {
+  return mi_theap_calloc_aligned(_mi_theap_default(), count, size, alignment);
+}
+
+
+mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
+  return mi_theap_malloc_aligned_at(_mi_heap_theap(heap), size, alignment, offset);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept {
+  return mi_theap_malloc_aligned(_mi_heap_theap(heap), size, alignment);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
+  return mi_theap_zalloc_aligned_at(_mi_heap_theap(heap), size, alignment, offset);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept {
+  return mi_theap_zalloc_aligned(_mi_heap_theap(heap), size, alignment);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
+  return mi_theap_calloc_aligned_at(_mi_heap_theap(heap), count, size, alignment, offset);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept {
+  return mi_theap_calloc_aligned(_mi_heap_theap(heap), count, size, alignment);
+}
+
+
+// ------------------------------------------------------
+// Aligned re-allocation
+// ------------------------------------------------------
+
+static void* mi_theap_realloc_zero_aligned_at(mi_theap_t* theap, void* p, size_t newsize, size_t alignment, size_t offset, bool zero) mi_attr_noexcept {
+  mi_assert(alignment > 0);
+  if (alignment <= sizeof(uintptr_t)) return _mi_theap_realloc_zero(theap,p,newsize,zero,NULL,NULL);
+  if (p == NULL) return mi_theap_malloc_zero_aligned_at(theap,newsize,alignment,offset,zero,NULL);
+  size_t size = mi_usable_size(p);
+  if (newsize <= size && newsize >= (size - (size / 2))
+      && (((uintptr_t)p + offset) % alignment) == 0) {
+    return p;  // reallocation still fits, is aligned and not more than 50% waste
+  }
+  else {
+    // note: we don't zero allocate upfront so we only zero initialize the expanded part
+    void* newp = mi_theap_malloc_aligned_at(theap,newsize,alignment,offset);
+    if (newp != NULL) {
+      if (zero && newsize > size) {
+        // also set last word in the previous allocation to zero to ensure any padding is zero-initialized
+        size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0);
+        _mi_memzero((uint8_t*)newp + start, newsize - start);
+      }
+      _mi_memcpy_aligned(newp, p, (newsize > size ? size : newsize));
+      mi_free(p); // only free if successful
+    }
+    return newp;
+  }
+}
+
+static void* mi_theap_realloc_zero_aligned(mi_theap_t* theap, void* p, size_t newsize, size_t alignment, bool zero) mi_attr_noexcept {
+  mi_assert(alignment > 0);
+  if (alignment <= sizeof(uintptr_t)) return _mi_theap_realloc_zero(theap,p,newsize,zero,NULL,NULL);
+  size_t offset = ((uintptr_t)p % alignment); // use offset of previous allocation (p can be NULL)
+  return mi_theap_realloc_zero_aligned_at(theap,p,newsize,alignment,offset,zero);
+}
+
+static void* mi_theap_realloc_aligned_at(mi_theap_t* theap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
+  return mi_theap_realloc_zero_aligned_at(theap,p,newsize,alignment,offset,false);
+}
+
+static void* mi_theap_realloc_aligned(mi_theap_t* theap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
+  return mi_theap_realloc_zero_aligned(theap,p,newsize,alignment,false);
+}
+
+static void* mi_theap_rezalloc_aligned_at(mi_theap_t* theap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
+  return mi_theap_realloc_zero_aligned_at(theap, p, newsize, alignment, offset, true);
+}
+
+static void* mi_theap_rezalloc_aligned(mi_theap_t* theap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
+  return mi_theap_realloc_zero_aligned(theap, p, newsize, alignment, true);
+}
+
+static void* mi_theap_recalloc_aligned_at(mi_theap_t* theap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
+  size_t total;
+  if (mi_count_size_overflow(newcount, size, &total)) return NULL;
+  return mi_theap_rezalloc_aligned_at(theap, p, total, alignment, offset);
+}
+
+static void* mi_theap_recalloc_aligned(mi_theap_t* theap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept {
+  size_t total;
+  if (mi_count_size_overflow(newcount, size, &total)) return NULL;
+  return mi_theap_rezalloc_aligned(theap, p, total, alignment);
+}
+
+
+mi_decl_nodiscard void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
+  return mi_theap_realloc_aligned_at(_mi_theap_default(), p, newsize, alignment, offset);
+}
+
+mi_decl_nodiscard void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
+  return mi_theap_realloc_aligned(_mi_theap_default(), p, newsize, alignment);
+}
+
+mi_decl_nodiscard void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
+  return mi_theap_rezalloc_aligned_at(_mi_theap_default(), p, newsize, alignment, offset);
+}
+
+mi_decl_nodiscard void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
+  return mi_theap_rezalloc_aligned(_mi_theap_default(), p, newsize, alignment);
+}
+
+mi_decl_nodiscard void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
+  return mi_theap_recalloc_aligned_at(_mi_theap_default(), p, newcount, size, alignment, offset);
+}
+
+mi_decl_nodiscard void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept {
+  return mi_theap_recalloc_aligned(_mi_theap_default(), p, newcount, size, alignment);
+}
+
+
+mi_decl_nodiscard void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
+  return mi_theap_realloc_aligned_at(_mi_heap_theap(heap), p, newsize, alignment, offset);
+}
+
+mi_decl_nodiscard void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
+  return mi_theap_realloc_aligned(_mi_heap_theap(heap), p, newsize, alignment);
+}
+
+mi_decl_nodiscard void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
+  return mi_theap_rezalloc_aligned_at(_mi_heap_theap(heap), p, newsize, alignment, offset);
+}
+
+mi_decl_nodiscard void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
+  return mi_theap_rezalloc_aligned(_mi_heap_theap(heap), p, newsize, alignment);
+}
+
+mi_decl_nodiscard void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
+  return mi_theap_recalloc_aligned_at(_mi_heap_theap(heap), p, newcount, size, alignment, offset);
+}
+
+mi_decl_nodiscard void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept {
+  return mi_theap_recalloc_aligned(_mi_heap_theap(heap), p, newcount, size, alignment);
+}
+
+
diff --git a/3rd/mimalloc-2.0.9/src/alloc-override.c b/3rd/mimalloc-3.2.6/src/alloc-override.c
similarity index 86%
rename from 3rd/mimalloc-2.0.9/src/alloc-override.c
rename to 3rd/mimalloc-3.2.6/src/alloc-override.c
index 84a0d19d..882e0607 100644
--- a/3rd/mimalloc-2.0.9/src/alloc-override.c
+++ b/3rd/mimalloc-3.2.6/src/alloc-override.c
@@ -23,7 +23,7 @@ mi_decl_externc size_t malloc_good_size(size_t size);
 #endif
 
 // helper definition for C override of C++ new
-typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
+typedef void* mi_nothrow_t;
 
 // ------------------------------------------------------
 // Override system malloc
@@ -57,7 +57,7 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
   // functions that are interposed (or the interposing does not work)
   #define MI_OSX_IS_INTERPOSED
 
-  mi_decl_externc static size_t mi_malloc_size_checked(void *p) {
+  mi_decl_externc size_t mi_malloc_size_checked(void *p) {
     if (!mi_is_in_heap_region(p)) return 0;
     return mi_usable_size(p);
   }
@@ -71,22 +71,20 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
   #define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun }
   #define MI_INTERPOSE_MI(fun)            MI_INTERPOSE_FUN(fun,mi_##fun)
 
-  __attribute__((used)) static struct mi_interpose_s _mi_interposes[]  __attribute__((section("__DATA, __interpose"))) =
+  #define MI_INTERPOSE_DECLS(name)        __attribute__((used)) static struct mi_interpose_s name[]  __attribute__((section("__DATA, __interpose")))
+
+  MI_INTERPOSE_DECLS(_mi_interposes) =
   {
     MI_INTERPOSE_MI(malloc),
     MI_INTERPOSE_MI(calloc),
     MI_INTERPOSE_MI(realloc),
     MI_INTERPOSE_MI(strdup),
-    MI_INTERPOSE_MI(strndup),
     MI_INTERPOSE_MI(realpath),
     MI_INTERPOSE_MI(posix_memalign),
     MI_INTERPOSE_MI(reallocf),
     MI_INTERPOSE_MI(valloc),
     MI_INTERPOSE_FUN(malloc_size,mi_malloc_size_checked),
     MI_INTERPOSE_MI(malloc_good_size),
-    #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15
-    MI_INTERPOSE_MI(aligned_alloc),
-    #endif
     #ifdef MI_OSX_ZONE
     // we interpose malloc_default_zone in alloc-override-osx.c so we can use mi_free safely
     MI_INTERPOSE_MI(free),
@@ -97,6 +95,12 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
     MI_INTERPOSE_FUN(vfree,mi_cfree),
     #endif
   };
+  MI_INTERPOSE_DECLS(_mi_interposes_10_7) __OSX_AVAILABLE(10.7) = {
+    MI_INTERPOSE_MI(strndup),
+  };
+  MI_INTERPOSE_DECLS(_mi_interposes_10_15) __OSX_AVAILABLE(10.15) = {
+    MI_INTERPOSE_MI(aligned_alloc),
+  };
 
   #ifdef __cplusplus
   extern "C" {
@@ -128,11 +132,19 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
   // cannot override malloc unless using a dll.
   // we just override new/delete which does work in a static library.
 #else
-  // On all other systems forward to our API
+  // On all other systems forward allocation primitives to our API
   mi_decl_export void* malloc(size_t size)              MI_FORWARD1(mi_malloc, size)
   mi_decl_export void* calloc(size_t size, size_t n)    MI_FORWARD2(mi_calloc, size, n)
   mi_decl_export void* realloc(void* p, size_t newsize) MI_FORWARD2(mi_realloc, p, newsize)
   mi_decl_export void  free(void* p)                    MI_FORWARD0(mi_free, p)
+  // In principle we do not need to forward `strdup`/`strndup` but on some systems these do not use `malloc` internally (but a more primitive call)
+  // We only override if `strdup` is not a macro (as on some older libc's, see issue #885)
+  #if !defined(strdup)
+  mi_decl_export char* strdup(const char* str)             MI_FORWARD1(mi_strdup, str)
+  #endif
+  #if !defined(strndup) && (!defined(__APPLE__) || (defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7))
+  mi_decl_export char* strndup(const char* str, size_t n)  MI_FORWARD2(mi_strndup, str, n)
+  #endif
 #endif
 
 #if (defined(__GNUC__) || defined(__clang__)) && !defined(__APPLE__)
@@ -192,11 +204,17 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
   void _ZdaPv(void* p)            MI_FORWARD0(mi_free,p) // delete[]
   void _ZdlPvm(void* p, size_t n) MI_FORWARD02(mi_free_size,p,n)
   void _ZdaPvm(void* p, size_t n) MI_FORWARD02(mi_free_size,p,n)
+
   void _ZdlPvSt11align_val_t(void* p, size_t al)            { mi_free_aligned(p,al); }
   void _ZdaPvSt11align_val_t(void* p, size_t al)            { mi_free_aligned(p,al); }
   void _ZdlPvmSt11align_val_t(void* p, size_t n, size_t al) { mi_free_size_aligned(p,n,al); }
   void _ZdaPvmSt11align_val_t(void* p, size_t n, size_t al) { mi_free_size_aligned(p,n,al); }
 
+  void _ZdlPvRKSt9nothrow_t(void* p, mi_nothrow_t tag)      { MI_UNUSED(tag); mi_free(p); }  // operator delete(void*, std::nothrow_t const&)
+  void _ZdaPvRKSt9nothrow_t(void* p, mi_nothrow_t tag)      { MI_UNUSED(tag); mi_free(p); }  // operator delete[](void*, std::nothrow_t const&)
+  void _ZdlPvSt11align_val_tRKSt9nothrow_t(void* p, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); mi_free_aligned(p,al); } // operator delete(void*, std::align_val_t, std::nothrow_t const&)
+  void _ZdaPvSt11align_val_tRKSt9nothrow_t(void* p, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); mi_free_aligned(p,al); } // operator delete[](void*, std::align_val_t, std::nothrow_t const&)
+
   #if (MI_INTPTR_SIZE==8)
     void* _Znwm(size_t n)                             MI_FORWARD1(mi_new,n)  // new 64-bit
     void* _Znam(size_t n)                             MI_FORWARD1(mi_new,n)  // new[] 64-bit
@@ -232,7 +250,7 @@ extern "C" {
   // Forward Posix/Unix calls as well
   void*  reallocf(void* p, size_t newsize) MI_FORWARD2(mi_reallocf,p,newsize)
   size_t malloc_size(const void* p)        MI_FORWARD1(mi_usable_size,p)
-  #if !defined(__ANDROID__) && !defined(__FreeBSD__)
+  #if !defined(__ANDROID__) && !defined(__FreeBSD__) && !defined(__DragonFly__)
   size_t malloc_usable_size(void *p)       MI_FORWARD1(mi_usable_size,p)
   #else
   size_t malloc_usable_size(const void *p) MI_FORWARD1(mi_usable_size,p)
@@ -245,11 +263,13 @@ extern "C" {
   int    posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p, alignment, size); }
 
   // `aligned_alloc` is only available when __USE_ISOC11 is defined.
+  // Note: it seems __USE_ISOC11 is not defined in musl (and perhaps other libc's) so we only check
+  // for it if using glibc.
   // Note: Conda has a custom glibc where `aligned_alloc` is declared `static inline` and we cannot
   // override it, but both _ISOC11_SOURCE and __USE_ISOC11 are undefined in Conda GCC7 or GCC9.
   // Fortunately, in the case where `aligned_alloc` is declared as `static inline` it
   // uses internally `memalign`, `posix_memalign`, or `_aligned_malloc` so we  can avoid overriding it ourselves.
-  #if __USE_ISOC11
+  #if !defined(__GLIBC__) || __USE_ISOC11
   void* aligned_alloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); }
   #endif
 #endif
@@ -257,10 +277,11 @@ extern "C" {
 // no forwarding here due to aliasing/name mangling issues
 void  cfree(void* p)                                    { mi_free(p); }
 void* pvalloc(size_t size)                              { return mi_pvalloc(size); }
-void* reallocarray(void* p, size_t count, size_t size)  { return mi_reallocarray(p, count, size); }
-int   reallocarr(void* p, size_t count, size_t size)    { return mi_reallocarr(p, count, size); }
 void* memalign(size_t alignment, size_t size)           { return mi_memalign(alignment, size); }
 void* _aligned_malloc(size_t alignment, size_t size)    { return mi_aligned_alloc(alignment, size); }
+void* reallocarray(void* p, size_t count, size_t size)  { return mi_reallocarray(p, count, size); }
+// some systems define reallocarr so mark it as a weak symbol (#751)
+mi_decl_weak int reallocarr(void* p, size_t count, size_t size)    { return mi_reallocarr(p, count, size); }
 
 #if defined(__wasi__)
   // forward __libc interface (see PR #667)
@@ -270,8 +291,8 @@ void* _aligned_malloc(size_t alignment, size_t size)    { return mi_aligned_allo
   void  __libc_free(void* p)                            MI_FORWARD0(mi_free, p)
   void* __libc_memalign(size_t alignment, size_t size)  { return mi_memalign(alignment, size); }
 
-#elif defined(__GLIBC__) && defined(__linux__)
-  // forward __libc interface (needed for glibc-based Linux distributions)
+#elif defined(__linux__)
+  // forward __libc interface (needed for glibc-based and musl-based Linux distributions)
   void* __libc_malloc(size_t size)                      MI_FORWARD1(mi_malloc,size)
   void* __libc_calloc(size_t count, size_t size)        MI_FORWARD2(mi_calloc,count,size)
   void* __libc_realloc(void* p, size_t size)            MI_FORWARD2(mi_realloc,p,size)
diff --git a/3rd/mimalloc-2.0.9/src/alloc-posix.c b/3rd/mimalloc-3.2.6/src/alloc-posix.c
similarity index 96%
rename from 3rd/mimalloc-2.0.9/src/alloc-posix.c
rename to 3rd/mimalloc-3.2.6/src/alloc-posix.c
index e6505f29..225752fd 100644
--- a/3rd/mimalloc-2.0.9/src/alloc-posix.c
+++ b/3rd/mimalloc-3.2.6/src/alloc-posix.c
@@ -10,7 +10,7 @@ terms of the MIT license. A copy of the license can be found in the file
 // for convenience and used when overriding these functions.
 // ------------------------------------------------------------------------
 #include "mimalloc.h"
-#include "mimalloc-internal.h"
+#include "mimalloc/internal.h"
 
 // ------------------------------------------------------
 // Posix & Unix functions definitions
@@ -56,7 +56,8 @@ int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept
   // Note: The spec dictates we should not modify `*p` on an error. (issue#27)
   // <http://man7.org/linux/man-pages/man3/posix_memalign.3.html>
   if (p == NULL) return EINVAL;
-  if (alignment % sizeof(void*) != 0) return EINVAL;                   // natural alignment
+  if ((alignment % sizeof(void*)) != 0) return EINVAL;                 // natural alignment
+  // it is also required that alignment is a power of 2 and > 0; this is checked in `mi_malloc_aligned`
   if (alignment==0 || !_mi_is_power_of_two(alignment)) return EINVAL;  // not a power of 2
   void* q = mi_malloc_aligned(size, alignment);
   if (q==NULL && size != 0) return ENOMEM;
@@ -149,7 +150,7 @@ int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept {
   else {
     *buf = mi_strdup(p);
     if (*buf==NULL) return ENOMEM;
-    if (size != NULL) *size = strlen(p);
+    if (size != NULL) *size = _mi_strlen(p);
   }
   return 0;
 }
diff --git a/3rd/mimalloc-3.2.6/src/alloc.c b/3rd/mimalloc-3.2.6/src/alloc.c
new file mode 100644
index 00000000..77c010cc
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/alloc.c
@@ -0,0 +1,846 @@
+
+/* ----------------------------------------------------------------------------
+Copyright (c) 2018-2025, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+#ifndef _DEFAULT_SOURCE
+#define _DEFAULT_SOURCE   // for realpath() on Linux
+#endif
+
+#include "mimalloc.h"
+#include "mimalloc/internal.h"
+#include "mimalloc/atomic.h"
+#include "mimalloc/prim.h"   // _mi_prim_thread_id()
+
+#include <string.h>      // memset, strlen (for mi_strdup)
+#include <stdlib.h>      // malloc, abort
+
+#define MI_IN_ALLOC_C
+#include "alloc-override.c"
+#include "free.c"
+#undef MI_IN_ALLOC_C
+
+// ------------------------------------------------------
+// Allocation
+// ------------------------------------------------------
+
+// Fast allocation in a page: just pop from the free list.
+// Fall back to generic allocation only if the list is empty.
+// Note: in release mode the (inlined) routine is about 7 instructions with a single test.
+static mi_decl_forceinline void* mi_page_malloc_zero(mi_theap_t* theap, mi_page_t* page, size_t size, bool zero, size_t* usable) mi_attr_noexcept
+{
+  if (page->block_size != 0) { // not the empty theap
+    mi_assert_internal(mi_page_block_size(page) >= size);
+    mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
+    mi_assert_internal(_mi_ptr_page(page)==page);
+  }
+
+  // check the free list
+  mi_block_t* const block = page->free;
+  if mi_unlikely(block == NULL) {
+    return _mi_malloc_generic(theap, size, (zero ? 1 : 0), usable);
+  }
+  mi_assert_internal(block != NULL && _mi_ptr_page(block) == page);
+  if (usable != NULL) { *usable = mi_page_usable_block_size(page); };
+  // pop from the free list
+  page->free = mi_block_next(page, block);
+  page->used++;
+  mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page);
+  mi_assert_internal(page->block_size < MI_MAX_ALIGN_SIZE || _mi_is_aligned(block, MI_MAX_ALIGN_SIZE));
+
+  #if MI_DEBUG>3
+  if (page->free_is_zero && size > sizeof(*block)) {
+    mi_assert_expensive(mi_mem_is_zero(block+1,size - sizeof(*block)));
+  }
+  #endif
+
+  // allow use of the block internally
+  // note: when tracking we need to avoid ever touching the MI_PADDING since
+  // that is tracked by valgrind etc. as non-accessible (through the red-zone, see `mimalloc/track.h`)
+  const size_t bsize = mi_page_usable_block_size(page);
+  mi_track_mem_undefined(block, bsize);
+
+  #if (MI_STAT>0)
+  if (bsize <= MI_LARGE_MAX_OBJ_SIZE) {
+    mi_theap_stat_increase(theap, malloc_normal, bsize);
+    #if (MI_STAT>1)
+    mi_theap_stat_counter_increase(theap, malloc_normal_count, 1);
+    const size_t bin = _mi_bin(bsize);
+    mi_theap_stat_increase(theap, malloc_bins[bin], 1);
+    mi_theap_stat_increase(theap, malloc_requested, size - MI_PADDING_SIZE);
+    #endif
+  }
+  #endif
+
+  // zero the block? note: we need to zero the full block size (issue #63)
+  if mi_likely(!zero) {
+    #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN
+      if (!mi_page_is_huge(page)) { memset(block, MI_DEBUG_UNINIT, bsize); }
+    #endif
+    #if MI_SECURE
+      block->next = 0;  // don't leak internal data
+    #endif
+  }
+  else {
+    mi_assert_internal(bsize<=2*MI_SMALL_SIZE_MAX);  // allow faster zero'ing for small blocks
+    if (!page->free_is_zero) {
+      _mi_memzero_aligned_small(block, bsize);
+    }
+    else {
+      block->next = 0;
+      mi_track_mem_defined(block, bsize);
+    }
+  }
+
+  #if MI_PADDING // && !MI_TRACK_ENABLED
+    mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + bsize);
+    ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE));
+    #if (MI_DEBUG>=2)
+    mi_assert_internal(delta >= 0 && bsize >= (size - MI_PADDING_SIZE + delta));
+    #endif
+    mi_track_mem_defined(padding,sizeof(mi_padding_t));  // note: re-enable since mi_page_usable_block_size may set noaccess
+    padding->canary = mi_ptr_encode_canary(page,block,page->keys);
+    padding->delta  = (uint32_t)(delta);
+    #if MI_PADDING_CHECK
+    if (!mi_page_is_huge(page)) {
+      uint8_t* fill = (uint8_t*)padding - delta;
+      const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // set at most N initial padding bytes
+      for (size_t i = 0; i < maxpad; i++) { fill[i] = MI_DEBUG_PADDING; }
+    }
+    #endif
+  #endif
+
+  return block;
+}
+
+// extra entries for improved efficiency in `alloc-aligned.c`.
+extern void* _mi_page_malloc(mi_theap_t* theap, mi_page_t* page, size_t size) mi_attr_noexcept {
+  return mi_page_malloc_zero(theap, page, size, false, NULL);
+}
+extern void* _mi_page_malloc_zeroed(mi_theap_t* theap, mi_page_t* page, size_t size) mi_attr_noexcept {
+  if mi_likely(size<=MI_SMALL_SIZE_MAX) {
+    return mi_page_malloc_zero(theap, page, size, true, NULL);
+  }
+  else {
+    void* p = mi_page_malloc_zero(theap, page, size, false, NULL);
+    if mi_likely(p!=NULL) { _mi_memzero_aligned(p, size - MI_PADDING_SIZE); } // todo: zero usable size?
+    return p;
+  }
+}
+
+#if MI_GUARDED
+mi_decl_restrict void* _mi_theap_malloc_guarded(mi_theap_t* theap, size_t size, bool zero) mi_attr_noexcept;
+#endif
+
+// main allocation primitives for small and generic allocation
+
+// internal small size allocation
+static mi_decl_forceinline mi_decl_restrict void* mi_theap_malloc_small_zero_nonnull(mi_theap_t* theap, size_t size, bool zero, size_t* usable) mi_attr_noexcept
+{
+  mi_assert(theap != NULL);
+  mi_assert(size <= MI_SMALL_SIZE_MAX);
+  #if MI_DEBUG
+  const uintptr_t tid = _mi_thread_id();
+  mi_assert(theap->tld->thread_id == 0 || theap->tld->thread_id == tid); // theaps are thread local
+  #endif
+  #if (MI_PADDING || MI_GUARDED)
+  if mi_unlikely(size == 0) { size = sizeof(void*); }
+  #endif
+  #if MI_GUARDED
+  if mi_unlikely(mi_theap_malloc_use_guarded(theap,size)) {
+    return _mi_theap_malloc_guarded(theap, size, zero);
+  }
+  #endif
+
+  // get page in constant time, and allocate from it
+  mi_page_t* page = _mi_theap_get_free_small_page(theap, size + MI_PADDING_SIZE);
+  void* const p = mi_page_malloc_zero(theap, page, size + MI_PADDING_SIZE, zero, usable);
+  mi_track_malloc(p,size,zero);
+
+  #if MI_DEBUG>3
+  if (p != NULL && zero) {
+    mi_assert_expensive(mi_mem_is_zero(p, size));
+  }
+  #endif
+  return p;
+}
+
+// internal generic allocation
+static mi_decl_forceinline void* mi_theap_malloc_generic(mi_theap_t* theap, size_t size, bool zero, size_t huge_alignment, size_t* usable) mi_attr_noexcept
+{
+  #if MI_GUARDED
+  #if MI_THEAP_INITASNULL
+  if (theap!=NULL)
+  #endif
+  if (huge_alignment==0 && mi_theap_malloc_use_guarded(theap, size)) {
+    return _mi_theap_malloc_guarded(theap, size, zero);
+  }
+  #endif
+  #if !MI_THEAP_INITASNULL
+  mi_assert(theap!=NULL);
+  #endif
+  mi_assert(theap==NULL || theap->tld->thread_id == 0 || theap->tld->thread_id == _mi_thread_id());   // theaps are thread local
+  mi_assert((huge_alignment & 1)==0);
+  void* const p = _mi_malloc_generic(theap, size + MI_PADDING_SIZE, (zero ? 1 : 0) | huge_alignment, usable);  // note: size can overflow but it is detected in malloc_generic
+  mi_track_malloc(p, size, zero);
+
+  #if MI_DEBUG>3
+  if (p != NULL && zero) {
+    mi_assert_expensive(mi_mem_is_zero(p, size));
+  }
+  #endif
+  return p;
+}
+
+// internal small allocation
+static mi_decl_forceinline mi_decl_restrict void* mi_theap_malloc_small_zero(mi_theap_t* theap, size_t size, bool zero, size_t* usable) mi_attr_noexcept {
+  #if MI_THEAP_INITASNULL
+  if (theap!=NULL) {
+    return mi_theap_malloc_small_zero_nonnull(theap, size, zero, usable);
+  }
+  else {
+    return mi_theap_malloc_generic(theap, size, zero, 0, usable); // tailcall
+  }
+  #else
+  return mi_theap_malloc_small_zero_nonnull(theap, size, zero, usable);
+  #endif
+}
+
+
+// allocate a small block
+mi_decl_nodiscard extern inline mi_decl_restrict void* mi_theap_malloc_small(mi_theap_t* theap, size_t size) mi_attr_noexcept {
+  return mi_theap_malloc_small_zero(theap, size, false, NULL);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept {
+  return mi_theap_malloc_small(_mi_theap_default(), size);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept {
+  return mi_theap_malloc_small_zero_nonnull(_mi_heap_theap(heap), size, false, NULL);
+}
+
+// The main internal allocation functions
+static mi_decl_forceinline void* mi_theap_malloc_zero_nonnull(mi_theap_t* theap, size_t size, bool zero, size_t huge_alignment, size_t* usable) mi_attr_noexcept {
+  // fast path for small objects
+  if mi_likely(size <= MI_SMALL_SIZE_MAX) {
+    mi_assert_internal(huge_alignment == 0);
+    return mi_theap_malloc_small_zero_nonnull(theap, size, zero, usable);
+  }
+  else {
+    return mi_theap_malloc_generic(theap, size, zero, huge_alignment, usable);
+  }
+}
+
+extern mi_decl_forceinline void* _mi_theap_malloc_zero_ex(mi_theap_t* theap, size_t size, bool zero, size_t huge_alignment, size_t* usable) mi_attr_noexcept {
+  // fast path for small objects
+  #if MI_THEAP_INITASNULL
+  if mi_likely(theap!=NULL && size <= MI_SMALL_SIZE_MAX)
+  #else
+  if mi_likely(size <= MI_SMALL_SIZE_MAX)
+  #endif
+  {
+    mi_assert_internal(huge_alignment == 0);
+    return mi_theap_malloc_small_zero_nonnull(theap, size, zero, usable);
+  }
+  else {
+    return mi_theap_malloc_generic(theap, size, zero, huge_alignment, usable);
+  }
+}
+
+inline void* _mi_theap_malloc_zero(mi_theap_t* theap, size_t size, bool zero, size_t* usable) mi_attr_noexcept {
+  return _mi_theap_malloc_zero_ex(theap, size, zero, 0, usable);
+}
+
+
+// Main allocation functions
+
+mi_decl_nodiscard extern inline mi_decl_restrict void* mi_theap_malloc(mi_theap_t* theap, size_t size) mi_attr_noexcept {
+  return _mi_theap_malloc_zero(theap, size, false, NULL);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept {
+  return mi_theap_malloc(_mi_theap_default(), size);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
+  return mi_theap_malloc_zero_nonnull(_mi_heap_theap(heap), size, false, 0, NULL);
+}
+
+
+// zero initialized small block
+mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept {
+  return mi_theap_malloc_small_zero(_mi_theap_default(), size, true, NULL);
+}
+
+mi_decl_nodiscard extern inline mi_decl_restrict void* mi_theap_zalloc(mi_theap_t* theap, size_t size) mi_attr_noexcept {
+  return _mi_theap_malloc_zero(theap, size, true, NULL);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_zalloc(size_t size) mi_attr_noexcept {
+  return _mi_theap_malloc_zero(_mi_theap_default(), size, true, NULL);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
+  return mi_theap_malloc_zero_nonnull(_mi_heap_theap(heap), size, true, 0, NULL);
+}
+
+mi_decl_nodiscard extern inline mi_decl_restrict void* mi_theap_calloc(mi_theap_t* theap, size_t count, size_t size) mi_attr_noexcept {
+  size_t total;
+  if (mi_count_size_overflow(count,size,&total)) return NULL;
+  return mi_theap_zalloc(theap,total);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_calloc(size_t count, size_t size) mi_attr_noexcept {
+  return mi_theap_calloc(_mi_theap_default(),count,size);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept {
+  size_t total;
+  if (mi_count_size_overflow(count, size, &total)) return NULL;
+  return mi_heap_zalloc(heap, total);
+}
+
+// Return usable size
+mi_decl_nodiscard mi_decl_restrict void* mi_umalloc_small(size_t size, size_t* usable) mi_attr_noexcept {
+  return mi_theap_malloc_small_zero(_mi_theap_default(), size, false, usable);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_theap_umalloc(mi_theap_t* theap, size_t size, size_t* usable) mi_attr_noexcept {
+  return _mi_theap_malloc_zero_ex(theap, size, false, 0, usable);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_umalloc(size_t size, size_t* usable) mi_attr_noexcept {
+  return mi_theap_umalloc(_mi_theap_default(), size, usable);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_uzalloc(size_t size, size_t* usable) mi_attr_noexcept {
+  return _mi_theap_malloc_zero_ex(_mi_theap_default(), size, true, 0, usable);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_ucalloc(size_t count, size_t size, size_t* usable) mi_attr_noexcept {
+  size_t total;
+  if (mi_count_size_overflow(count,size,&total)) return NULL;
+  return mi_uzalloc(total, usable);
+}
+
+// Uninitialized `calloc`
+static mi_decl_restrict void* mi_theap_mallocn(mi_theap_t* theap, size_t count, size_t size) mi_attr_noexcept {
+  size_t total;
+  if (mi_count_size_overflow(count, size, &total)) return NULL;
+  return mi_theap_malloc(theap, total);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept {
+  return mi_theap_mallocn(_mi_theap_default(),count,size);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept {
+  size_t total;
+  if (mi_count_size_overflow(count, size, &total)) return NULL;
+  return mi_heap_malloc(heap, total);
+}
+
+
+// Expand (or shrink) in place (or fail)
+void* mi_expand(void* p, size_t newsize) mi_attr_noexcept {
+  #if MI_PADDING
+  // we do not shrink/expand with padding enabled
+  MI_UNUSED(p); MI_UNUSED(newsize);
+  return NULL;
+  #else
+  if (p == NULL) return NULL;
+  const mi_page_t* const page = mi_validate_ptr_page(p,"mi_expand");
+  const size_t size = _mi_usable_size(p,page);
+  if (newsize > size) return NULL;
+  return p; // it fits
+  #endif
+}
+
+void* _mi_theap_realloc_zero(mi_theap_t* theap, void* p, size_t newsize, bool zero, size_t* usable_pre, size_t* usable_post) mi_attr_noexcept {
+  // if p == NULL then behave as malloc.
+  // else if size == 0 then reallocate to a zero-sized block (and don't return NULL, just as mi_malloc(0)).
+  // (this means that returning NULL always indicates an error, and `p` will not have been freed in that case.)
+  const mi_page_t* page;
+  size_t size;
+  if (p==NULL) {
+    page = NULL;
+    size = 0;
+    if (usable_pre!=NULL) { *usable_pre = 0; }
+  }
+  else {
+    page = mi_validate_ptr_page(p,"mi_realloc");
+    size = _mi_usable_size(p,page);
+    if (usable_pre!=NULL) { *usable_pre = mi_page_usable_block_size(page); }
+  }
+  if mi_unlikely(newsize<=size && newsize>=(size/2) && newsize>0  // note: newsize must be > 0 or otherwise we return NULL for realloc(NULL,0)
+                  && mi_page_heap(page)==theap->heap)             // and within the same heap
+  {
+    mi_assert_internal(p!=NULL);
+    // todo: do not track as the usable size is still the same in the free; adjust potential padding?
+    // mi_track_resize(p,size,newsize)
+    // if (newsize < size) { mi_track_mem_noaccess((uint8_t*)p + newsize, size - newsize); }
+    if (usable_post!=NULL) { *usable_post = mi_page_usable_block_size(page); }
+    return p;  // reallocation still fits and not more than 50% waste
+  }
+  void* newp = mi_theap_umalloc(theap,newsize,usable_post);
+  if mi_likely(newp != NULL) {
+    if (zero && newsize > size) {
+      // also set last word in the previous allocation to zero to ensure any padding is zero-initialized
+      const size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0);
+      _mi_memzero((uint8_t*)newp + start, newsize - start);
+    }
+    else if (newsize == 0) {
+      ((uint8_t*)newp)[0] = 0; // work around for applications that expect zero-reallocation to be zero initialized (issue #725)
+    }
+    if mi_likely(p != NULL) {
+      const size_t copysize = (newsize > size ? size : newsize);
+      mi_track_mem_defined(p,copysize);  // _mi_useable_size may be too large for byte precise memory tracking..
+      _mi_memcpy(newp, p, copysize);
+      mi_free(p); // only free the original pointer if successful  // todo: optimize since page is known?
+    }
+  }
+  return newp;
+}
+
+mi_decl_nodiscard void* mi_theap_realloc(mi_theap_t* theap, void* p, size_t newsize) mi_attr_noexcept {
+  return _mi_theap_realloc_zero(theap, p, newsize, false, NULL, NULL);
+}
+
+static void* mi_theap_reallocn(mi_theap_t* theap, void* p, size_t count, size_t size) mi_attr_noexcept {
+  size_t total;
+  if (mi_count_size_overflow(count, size, &total)) return NULL;
+  return mi_theap_realloc(theap, p, total);
+}
+
+
+// Reallocate but free `p` on errors
+static void* mi_theap_reallocf(mi_theap_t* theap, void* p, size_t newsize) mi_attr_noexcept {
+  void* newp = mi_theap_realloc(theap, p, newsize);
+  if (newp==NULL && p!=NULL) mi_free(p);
+  return newp;
+}
+
+static void* mi_theap_rezalloc(mi_theap_t* theap, void* p, size_t newsize) mi_attr_noexcept {
+  return _mi_theap_realloc_zero(theap, p, newsize, true, NULL, NULL);
+}
+
+static void* mi_theap_recalloc(mi_theap_t* theap, void* p, size_t count, size_t size) mi_attr_noexcept {
+  size_t total;
+  if (mi_count_size_overflow(count, size, &total)) return NULL;
+  return mi_theap_rezalloc(theap, p, total);
+}
+
+
+mi_decl_nodiscard void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept {
+  return mi_theap_realloc(_mi_theap_default(),p,newsize);
+}
+
+mi_decl_nodiscard void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept {
+  return mi_theap_reallocn(_mi_theap_default(),p,count,size);
+}
+
+mi_decl_nodiscard void* mi_urealloc(void* p, size_t newsize, size_t* usable_pre, size_t* usable_post) mi_attr_noexcept {
+  return _mi_theap_realloc_zero(_mi_theap_default(),p,newsize, false, usable_pre, usable_post);
+}
+
+// Reallocate but free `p` on errors
+mi_decl_nodiscard void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept {
+  return mi_theap_reallocf(_mi_theap_default(),p,newsize);
+}
+
+mi_decl_nodiscard void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept {
+  return mi_theap_rezalloc(_mi_theap_default(), p, newsize);
+}
+
+mi_decl_nodiscard void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept {
+  return mi_theap_recalloc(_mi_theap_default(), p, count, size);
+}
+
+
+mi_decl_nodiscard void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept {
+  return mi_theap_realloc(_mi_heap_theap(heap), p, newsize);
+}
+
+mi_decl_nodiscard void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept {
+  return mi_theap_reallocn(_mi_heap_theap(heap), p, count, size);
+}
+
+// Reallocate but free `p` on errors
+mi_decl_nodiscard void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept {
+  return mi_theap_reallocf(_mi_heap_theap(heap), p, newsize);
+}
+
+mi_decl_nodiscard void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept {
+  return mi_theap_rezalloc(_mi_heap_theap(heap), p, newsize);
+}
+
+mi_decl_nodiscard void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept {
+  return mi_theap_recalloc(_mi_heap_theap(heap), p, count, size);
+}
+
+
+
+// ------------------------------------------------------
+// strdup, strndup, and realpath
+// ------------------------------------------------------
+
+// `strdup` using mi_malloc
+mi_decl_nodiscard static mi_decl_restrict char* mi_theap_strdup(mi_theap_t* theap, const char* s) mi_attr_noexcept {
+  if (s == NULL) return NULL;
+  size_t len = _mi_strlen(s);
+  char* t = (char*)mi_theap_malloc(theap,len+1);
+  if (t == NULL) return NULL;
+  _mi_memcpy(t, s, len);
+  t[len] = 0;
+  return t;
+}
+
+mi_decl_nodiscard mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexcept {
+  return mi_theap_strdup(_mi_theap_default(), s);
+}
+
+mi_decl_nodiscard mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept {
+  return mi_theap_strdup(_mi_heap_theap(heap), s);
+}
+
+// `strndup` using mi_malloc
+mi_decl_nodiscard static mi_decl_restrict char* mi_theap_strndup(mi_theap_t* theap, const char* s, size_t n) mi_attr_noexcept {
+  if (s == NULL) return NULL;
+  const size_t len = _mi_strnlen(s,n);  // len <= n
+  char* t = (char*)mi_theap_malloc(theap, len+1);
+  if (t == NULL) return NULL;
+  _mi_memcpy(t, s, len);
+  t[len] = 0;
+  return t;
+}
+
+mi_decl_nodiscard mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept {
+  return mi_theap_strndup(_mi_theap_default(),s,n);
+}
+
+mi_decl_nodiscard mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept {
+  return mi_theap_strndup(_mi_heap_theap(heap), s, n);
+}
+
+#ifndef __wasi__
+// `realpath` using mi_malloc
+#ifdef _WIN32
+#ifndef PATH_MAX
+#define PATH_MAX MAX_PATH
+#endif
+
+mi_decl_nodiscard static mi_decl_restrict char* mi_theap_realpath(mi_theap_t* theap, const char* fname, char* resolved_name) mi_attr_noexcept {
+  // todo: use GetFullPathNameW to allow longer file names
+  char buf[PATH_MAX];
+  DWORD res = GetFullPathNameA(fname, PATH_MAX, (resolved_name == NULL ? buf : resolved_name), NULL);
+  if (res == 0) {
+    errno = GetLastError(); return NULL;
+  }
+  else if (res > PATH_MAX) {
+    errno = EINVAL; return NULL;
+  }
+  else if (resolved_name != NULL) {
+    return resolved_name;
+  }
+  else {
+    return mi_theap_strndup(theap, buf, PATH_MAX);
+  }
+}
+#else
+char* mi_theap_realpath(mi_theap_t* theap, const char* fname, char* resolved_name) mi_attr_noexcept {
+  if (resolved_name != NULL) {
+    return realpath(fname,resolved_name);
+  }
+  else {
+    char* rname = realpath(fname, NULL);
+    if (rname == NULL) return NULL;
+    char* result = mi_theap_strdup(theap, rname);
+    mi_cfree(rname);  // use checked free (which may be redirected to our free but that's ok)
+    // note: with ASAN realpath is intercepted and mi_cfree may leak the returned pointer :-(
+    return result;
+  }
+}
+#endif
+
+mi_decl_nodiscard mi_decl_restrict char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept {
+  return mi_theap_realpath(_mi_theap_default(),fname,resolved_name);
+}
+
+mi_decl_nodiscard mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept {
+  return mi_theap_realpath(_mi_heap_theap(heap), fname, resolved_name);
+}
+#endif
+
+/*-------------------------------------------------------
+C++ new and new_aligned
+The standard requires calling into `get_new_handler` and
+throwing the bad_alloc exception on failure. If we compile
+with a C++ compiler we can implement this precisely. If we
+use a C compiler we cannot throw a `bad_alloc` exception
+but we call `exit` instead (i.e. not returning).
+-------------------------------------------------------*/
+
+#ifdef __cplusplus
+#include <new>
+static bool mi_try_new_handler(bool nothrow) {
+  #if defined(_MSC_VER) || (__cplusplus >= 201103L)
+    std::new_handler h = std::get_new_handler();
+  #else
+    std::new_handler h = std::set_new_handler();
+    std::set_new_handler(h);
+  #endif
+  if (h==NULL) {
+    _mi_error_message(ENOMEM, "out of memory in 'new'");
+    #if defined(_CPPUNWIND) || defined(__cpp_exceptions)  // exceptions are not always enabled
+    if (!nothrow) {
+      throw std::bad_alloc();
+    }
+    #else
+    MI_UNUSED(nothrow);
+    #endif
+    return false;
+  }
+  else {
+    h();
+    return true;
+  }
+}
+#else
+typedef void (*std_new_handler_t)(void);
+
+#if (defined(__GNUC__) || (defined(__clang__) && !defined(_MSC_VER)))  // exclude clang-cl, see issue #631
+std_new_handler_t __attribute__((weak)) _ZSt15get_new_handlerv(void) {
+  return NULL;
+}
+static std_new_handler_t mi_get_new_handler(void) {
+  return _ZSt15get_new_handlerv();
+}
+#else
+// note: on windows we could dynamically link to `?get_new_handler@std@@YAP6AXXZXZ`.
+static std_new_handler_t mi_get_new_handler() {
+  return NULL;
+}
+#endif
+
+static bool mi_try_new_handler(bool nothrow) {
+  std_new_handler_t h = mi_get_new_handler();
+  if (h==NULL) {
+    _mi_error_message(ENOMEM, "out of memory in 'new'");
+    if (!nothrow) {
+      abort();  // cannot throw in plain C, use abort
+    }
+    return false;
+  }
+  else {
+    h();
+    return true;
+  }
+}
+#endif
+
+static mi_decl_noinline void* mi_theap_try_new(mi_theap_t* theap, size_t size, bool nothrow ) {
+  void* p = NULL;
+  while(p == NULL && mi_try_new_handler(nothrow)) {
+    p = mi_theap_malloc(theap,size);
+  }
+  return p;
+}
+
+static mi_decl_noinline void* mi_try_new(size_t size, bool nothrow) {
+  return mi_theap_try_new(_mi_theap_default(), size, nothrow);
+}
+
+static mi_decl_noinline void* mi_heap_try_new(mi_heap_t* heap, size_t size, bool nothrow) {
+  return mi_theap_try_new(_mi_heap_theap(heap), size, nothrow);
+}
+
+
+mi_decl_nodiscard static mi_decl_restrict void* mi_theap_alloc_new(mi_theap_t* theap, size_t size) {
+  void* p = mi_theap_malloc(theap,size);
+  if mi_unlikely(p == NULL) return mi_theap_try_new(theap, size, false);
+  return p;
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_new(size_t size) {
+  return mi_theap_alloc_new(_mi_theap_default(), size);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) {
+  void* p = mi_heap_malloc(heap, size);
+  if mi_unlikely(p == NULL) return mi_heap_try_new(heap, size, false);
+  return p;
+}
+
+
+mi_decl_nodiscard static mi_decl_restrict void* mi_theap_alloc_new_n(mi_theap_t* theap, size_t count, size_t size) {
+  size_t total;
+  if mi_unlikely(mi_count_size_overflow(count, size, &total)) {
+    mi_try_new_handler(false);  // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc
+    return NULL;
+  }
+  else {
+    return mi_theap_alloc_new(theap,total);
+  }
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_new_n(size_t count, size_t size) {
+  return mi_theap_alloc_new_n(_mi_theap_default(), count, size);
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) {
+  return mi_theap_alloc_new_n(_mi_heap_theap(heap), count, size);
+}
+
+
+mi_decl_nodiscard mi_decl_restrict void* mi_new_nothrow(size_t size) mi_attr_noexcept {
+  void* p = mi_malloc(size);
+  if mi_unlikely(p == NULL) return mi_try_new(size, true);
+  return p;
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_new_aligned(size_t size, size_t alignment) {
+  void* p;
+  do {
+    p = mi_malloc_aligned(size, alignment);
+  }
+  while(p == NULL && mi_try_new_handler(false));
+  return p;
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_noexcept {
+  void* p;
+  do {
+    p = mi_malloc_aligned(size, alignment);
+  }
+  while(p == NULL && mi_try_new_handler(true));
+  return p;
+}
+
+mi_decl_nodiscard void* mi_new_realloc(void* p, size_t newsize) {
+  void* q;
+  do {
+    q = mi_realloc(p, newsize);
+  } while (q == NULL && mi_try_new_handler(false));
+  return q;
+}
+
+mi_decl_nodiscard void* mi_new_reallocn(void* p, size_t newcount, size_t size) {
+  size_t total;
+  if mi_unlikely(mi_count_size_overflow(newcount, size, &total)) {
+    mi_try_new_handler(false);  // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc
+    return NULL;
+  }
+  else {
+    return mi_new_realloc(p, total);
+  }
+}
+
+#if MI_GUARDED
+// We always allocate a guarded allocation at an offset (`mi_page_has_interior_pointers` will be true).
+// We then set the first word of the block to `0` for regular offset aligned allocations (in `alloc-aligned.c`)
+// and the first word to `~0` for guarded allocations to have a correct `mi_usable_size`
+
+static void* mi_block_ptr_set_guarded(mi_block_t* block, size_t obj_size) {
+  // TODO: we can still make padding work by moving it out of the guard page area
+  mi_page_t* const page = _mi_ptr_page(block);
+  mi_page_set_has_interior_pointers(page, true);
+  block->next = MI_BLOCK_TAG_GUARDED;
+
+  // set guard page at the end of the block
+  const size_t block_size = mi_page_block_size(page);  // must use `block_size` to match `mi_free_local`
+  const size_t os_page_size = _mi_os_page_size();
+  mi_assert_internal(block_size >= obj_size + os_page_size + sizeof(mi_block_t));
+  if (block_size < obj_size + os_page_size + sizeof(mi_block_t)) {
+    // should never happen
+    mi_free(block);
+    return NULL;
+  }
+  uint8_t* guard_page = (uint8_t*)block + block_size - os_page_size;
+  // note: the alignment of the guard page relies on blocks being os_page_size aligned which
+  // is ensured in `mi_arena_page_alloc_fresh`.
+  mi_assert_internal(_mi_is_aligned(block, os_page_size));
+  mi_assert_internal(_mi_is_aligned(guard_page, os_page_size));
+  if (!page->memid.is_pinned && _mi_is_aligned(guard_page, os_page_size)) {
+    const bool ok = _mi_os_protect(guard_page, os_page_size);
+    if mi_unlikely(!ok) {
+      _mi_warning_message("failed to set a guard page behind an object (object %p of size %zu)\n", block, block_size);
+    }
+  }
+  else {
+    _mi_warning_message("unable to set a guard page behind an object due to pinned memory (large OS pages?) (object %p of size %zu)\n", block, block_size);
+  }
+
+  // align pointer just in front of the guard page
+  size_t offset = block_size - os_page_size - obj_size;
+  mi_assert_internal(offset > sizeof(mi_block_t));
+  if (offset > MI_PAGE_MAX_OVERALLOC_ALIGN) {
+    // give up to place it right in front of the guard page if the offset is too large for unalignment
+    offset = MI_PAGE_MAX_OVERALLOC_ALIGN;
+  }
+  void* p = (uint8_t*)block + offset;
+  mi_track_align(block, p, offset, obj_size);
+  mi_track_mem_defined(block, sizeof(mi_block_t));
+  return p;
+}
+
+mi_decl_restrict void* _mi_theap_malloc_guarded(mi_theap_t* theap, size_t size, bool zero) mi_attr_noexcept
+{
+  #if defined(MI_PADDING_SIZE)
+  mi_assert(MI_PADDING_SIZE==0);
+  #endif
+  // allocate multiple of page size ending in a guard page
+  // ensure minimal alignment requirement?
+  const size_t os_page_size = _mi_os_page_size();
+  const size_t obj_size = (mi_option_is_enabled(mi_option_guarded_precise) ? size : _mi_align_up(size, MI_MAX_ALIGN_SIZE));
+  const size_t bsize    = _mi_align_up(_mi_align_up(obj_size, MI_MAX_ALIGN_SIZE) + sizeof(mi_block_t), MI_MAX_ALIGN_SIZE);
+  const size_t req_size = _mi_align_up(bsize + os_page_size, os_page_size);
+  mi_block_t* const block = (mi_block_t*)_mi_malloc_generic(theap, req_size, (zero ? 1 : 0), NULL);
+  if (block==NULL) return NULL;
+  void* const p = mi_block_ptr_set_guarded(block, obj_size);
+
+  // stats
+  mi_track_malloc(p, size, zero);
+  if (p != NULL) {
+    if (!mi_theap_is_initialized(theap)) { theap = _mi_theap_default(); }
+    #if MI_STAT>1
+    mi_theap_stat_adjust_decrease(theap, malloc_requested, req_size);
+    mi_theap_stat_increase(theap, malloc_requested, size);
+    #endif
+    mi_theap_stat_counter_increase(theap, malloc_guarded_count, 1);
+  }
+  #if MI_DEBUG>3
+  if (p != NULL && zero) {
+    mi_assert_expensive(mi_mem_is_zero(p, size));
+  }
+  #endif
+  return p;
+}
+#endif
+
+// ------------------------------------------------------
+// ensure explicit external inline definitions are emitted!
+// ------------------------------------------------------
+
+#ifdef __cplusplus
+void* _mi_externs[] = {
+  (void*)&_mi_page_malloc,
+  (void*)&_mi_page_malloc_zeroed,
+  (void*)&_mi_theap_malloc_zero,
+  (void*)&_mi_theap_malloc_zero_ex,
+  (void*)&mi_theap_malloc,
+  (void*)&mi_theap_zalloc,
+  (void*)&mi_theap_malloc_small,
+  (void*)&mi_malloc,
+  (void*)&mi_malloc_small,
+  (void*)&mi_zalloc,
+  (void*)&mi_zalloc_small,
+  (void*)&mi_heap_malloc,
+  (void*)&mi_heap_malloc_small,
+  (void*)&mi_malloc_aligned
+  // (void*)&mi_theap_alloc_new,
+  // (void*)&mi_theap_alloc_new_n
+};
+#endif
diff --git a/3rd/mimalloc-3.2.6/src/arena-meta.c b/3rd/mimalloc-3.2.6/src/arena-meta.c
new file mode 100644
index 00000000..7ff336e8
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/arena-meta.c
@@ -0,0 +1,179 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2019-2024, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+/* ----------------------------------------------------------------------------
+  We have a special "mini" allocator just for allocation of meta-data like
+  the theap (`mi_theap_t`) or thread-local data (`mi_tld_t`).
+
+  We reuse the bitmap of the arena's for allocation of 64b blocks inside
+  an arena slice (64KiB).
+  We always ensure that meta data is zero'd (we zero on `free`)
+-----------------------------------------------------------------------------*/
+
+#include "mimalloc.h"
+#include "mimalloc/internal.h"
+#include "bitmap.h"
+
+/* -----------------------------------------------------------
+  Meta data allocation
+----------------------------------------------------------- */
+
+#define MI_META_PAGE_SIZE         MI_ARENA_SLICE_SIZE
+#define MI_META_PAGE_ALIGN        MI_ARENA_SLICE_ALIGN
+
+// large enough such that META_MAX_SIZE > 4k (even on 32-bit)
+#define MI_META_BLOCK_SIZE        (1 << (16 - MI_BCHUNK_BITS_SHIFT))        // 128 on 64-bit
+#define MI_META_BLOCK_ALIGN       MI_META_BLOCK_SIZE
+#define MI_META_BLOCKS_PER_PAGE   (MI_META_PAGE_SIZE / MI_META_BLOCK_SIZE)  // 512
+#define MI_META_MAX_SIZE          (MI_BCHUNK_SIZE * MI_META_BLOCK_SIZE)
+
+#if MI_META_MAX_SIZE <= 4096
+#error "max meta object size should be at least 4KiB"
+#endif
+
+typedef struct mi_meta_page_s  {
+  _Atomic(struct mi_meta_page_s*)  next;    // a linked list of meta-data pages (never released)
+  mi_memid_t                       memid;   // provenance of the meta-page memory itself
+  mi_bbitmap_t                     blocks_free;  // a small bitmap with 1 bit per block.
+} mi_meta_page_t;
+
+static mi_decl_cache_align _Atomic(mi_meta_page_t*)  mi_meta_pages = MI_ATOMIC_VAR_INIT(NULL);
+
+
+#if MI_DEBUG > 1
+static mi_meta_page_t* mi_meta_page_of_ptr(void* p, size_t* block_idx) {
+  mi_meta_page_t* mpage = (mi_meta_page_t*)((uint8_t*)mi_align_down_ptr(p,MI_META_PAGE_ALIGN) + _mi_os_secure_guard_page_size());
+  if (block_idx != NULL) {
+    *block_idx = ((uint8_t*)p - (uint8_t*)mpage) / MI_META_BLOCK_SIZE;
+  }
+  return mpage;
+}
+#endif
+
+static mi_meta_page_t* mi_meta_page_next( mi_meta_page_t* mpage ) {
+  return mi_atomic_load_ptr_acquire(mi_meta_page_t, &mpage->next);
+}
+
+static void* mi_meta_block_start( mi_meta_page_t* mpage, size_t block_idx ) {
+  mi_assert_internal(_mi_is_aligned((uint8_t*)mpage - _mi_os_secure_guard_page_size(), MI_META_PAGE_ALIGN));
+  mi_assert_internal(block_idx < MI_META_BLOCKS_PER_PAGE);
+  void* p = ((uint8_t*)mpage - _mi_os_secure_guard_page_size() + (block_idx * MI_META_BLOCK_SIZE));
+  mi_assert_internal(mpage == mi_meta_page_of_ptr(p,NULL));
+  return p;
+}
+
+// allocate a fresh meta page and add it to the global list.
+static mi_meta_page_t* mi_meta_page_zalloc(void) {
+  // allocate a fresh arena slice
+  // note: careful with _mi_subproc as it may recurse into mi_tld and meta_page_zalloc again.. (same with _mi_os_numa_node()...)
+  mi_memid_t memid;
+  uint8_t* base = (uint8_t*)_mi_arenas_alloc_aligned(mi_heap_main(), MI_META_PAGE_SIZE, MI_META_PAGE_ALIGN, 0,
+                                                                    true /* commit*/, (MI_SECURE==0) /* allow large? */,
+                                                                    NULL /* req arena */, 0 /* thread_seq */, -1 /* numa node */, &memid);
+  if (base == NULL) return NULL;
+  mi_assert_internal(_mi_is_aligned(base,MI_META_PAGE_ALIGN));
+  if (!memid.initially_zero) {
+    _mi_memzero_aligned(base, MI_ARENA_SLICE_SIZE);
+  }
+
+  // guard pages
+  #if MI_SECURE >= 1
+  _mi_os_secure_guard_page_set_at(base, memid);
+  _mi_os_secure_guard_page_set_before(base + MI_META_PAGE_SIZE, memid);
+  #endif
+
+  // initialize the page and free block bitmap
+  mi_meta_page_t* mpage = (mi_meta_page_t*)(base + _mi_os_secure_guard_page_size());
+  mpage->memid = memid;
+  mi_bbitmap_init(&mpage->blocks_free, MI_META_BLOCKS_PER_PAGE, true /* already_zero */);
+  const size_t mpage_size  = offsetof(mi_meta_page_t,blocks_free) + mi_bbitmap_size(MI_META_BLOCKS_PER_PAGE, NULL);
+  const size_t info_blocks = _mi_divide_up(mpage_size,MI_META_BLOCK_SIZE);
+  const size_t guard_blocks = _mi_divide_up(_mi_os_secure_guard_page_size(), MI_META_BLOCK_SIZE);
+  mi_assert_internal(info_blocks + 2*guard_blocks < MI_META_BLOCKS_PER_PAGE);
+  mi_bbitmap_unsafe_setN(&mpage->blocks_free, info_blocks + guard_blocks, MI_META_BLOCKS_PER_PAGE - info_blocks - 2*guard_blocks);
+
+  // push atomically in front of the meta page list
+  // (note: there is no ABA issue since we never free meta-pages)
+  mi_meta_page_t* old = mi_atomic_load_ptr_acquire(mi_meta_page_t,&mi_meta_pages);
+  do {
+    mi_atomic_store_ptr_release(mi_meta_page_t, &mpage->next, old);
+  } while(!mi_atomic_cas_ptr_weak_acq_rel(mi_meta_page_t,&mi_meta_pages,&old,mpage));
+  return mpage;
+}
+
+
+// allocate meta-data
+mi_decl_noinline void* _mi_meta_zalloc( size_t size, mi_memid_t* pmemid )
+{
+  mi_assert_internal(pmemid != NULL);
+  size = _mi_align_up(size,MI_META_BLOCK_SIZE);
+  if (size == 0 || size > MI_META_MAX_SIZE) return NULL;
+  const size_t block_count = _mi_divide_up(size,MI_META_BLOCK_SIZE);
+  mi_assert_internal(block_count > 0 && block_count < MI_BCHUNK_BITS);
+  mi_meta_page_t* mpage0 = mi_atomic_load_ptr_acquire(mi_meta_page_t,&mi_meta_pages);
+  mi_meta_page_t* mpage = mpage0;
+  while (mpage != NULL) {
+    size_t block_idx;
+    if (mi_bbitmap_try_find_and_clearN(&mpage->blocks_free, block_count, 0, &block_idx)) {
+      // found and claimed `block_count` blocks
+      *pmemid = _mi_memid_create_meta(mpage, block_idx, block_count);
+      return mi_meta_block_start(mpage,block_idx);
+    }
+    else {
+      mpage = mi_meta_page_next(mpage);
+    }
+  }
+  // failed to find space in existing pages
+  if (mi_atomic_load_ptr_acquire(mi_meta_page_t,&mi_meta_pages) != mpage0) {
+    // the page list was updated by another thread in the meantime, retry
+    return _mi_meta_zalloc(size,pmemid);
+  }
+  // otherwise, allocate a fresh metapage and try once more
+  mpage = mi_meta_page_zalloc();
+  if (mpage != NULL) {
+    size_t block_idx;
+    if (mi_bbitmap_try_find_and_clearN(&mpage->blocks_free, block_count, 0, &block_idx)) {
+      // found and claimed `block_count` blocks
+      *pmemid = _mi_memid_create_meta(mpage, block_idx, block_count);
+      return mi_meta_block_start(mpage,block_idx);
+    }
+  }
+  // if all this failed, allocate from the OS
+  return _mi_os_alloc(size, pmemid);
+}
+
+// free meta-data
+mi_decl_noinline void _mi_meta_free(void* p, size_t size, mi_memid_t memid) {
+  if (p==NULL) return;
+  if (memid.memkind == MI_MEM_META) {
+    mi_assert_internal(_mi_divide_up(size, MI_META_BLOCK_SIZE) == memid.mem.meta.block_count);
+    const size_t block_count = memid.mem.meta.block_count;
+    const size_t block_idx   = memid.mem.meta.block_index;
+    mi_meta_page_t* mpage = (mi_meta_page_t*)memid.mem.meta.meta_page;
+    mi_assert_internal(mi_meta_page_of_ptr(p,NULL) == mpage);
+    mi_assert_internal(block_idx + block_count <= MI_META_BLOCKS_PER_PAGE);
+    mi_assert_internal(mi_bbitmap_is_clearN(&mpage->blocks_free, block_idx, block_count));
+    // we zero on free (and on the initial page allocation) so we don't need a "dirty" map
+    _mi_memzero_aligned(mi_meta_block_start(mpage, block_idx), block_count*MI_META_BLOCK_SIZE);
+    mi_bbitmap_setN(&mpage->blocks_free, block_idx, block_count);
+  }
+  else {
+    _mi_arenas_free(p,size,memid);
+  }
+}
+
+// used for debug output
+bool _mi_meta_is_meta_page(void* p)
+{
+  mi_meta_page_t* mpage0 = mi_atomic_load_ptr_acquire(mi_meta_page_t, &mi_meta_pages);
+  mi_meta_page_t* mpage = mpage0;
+  while (mpage != NULL) {
+    if ((void*)mpage == p) return true;
+    mpage = mi_meta_page_next(mpage);
+  }
+  return false;
+}
diff --git a/3rd/mimalloc-3.2.6/src/arena.c b/3rd/mimalloc-3.2.6/src/arena.c
new file mode 100644
index 00000000..1e11a27a
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/arena.c
@@ -0,0 +1,2311 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2019-2025, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+/* ----------------------------------------------------------------------------
+"Arenas" are fixed area's of OS memory from which we can allocate
+large blocks (>= MI_ARENA_MIN_BLOCK_SIZE, 64KiB).
+In contrast to the rest of mimalloc, the arenas are shared between
+threads and need to be accessed using atomic operations.
+
+Arenas are also used to for huge OS page (1GiB) reservations or for reserving
+OS memory upfront which can be improve performance or is sometimes needed
+on embedded devices. We can also employ this with WASI or `sbrk` systems
+to reserve large arenas upfront and be able to reuse the memory more effectively.
+
+The arena allocation needs to be thread safe and we use an atomic bitmap to allocate.
+-----------------------------------------------------------------------------*/
+
+#include "mimalloc.h"
+#include "mimalloc/internal.h"
+#include "mimalloc/prim.h"
+#include "bitmap.h"
+
+/* -----------------------------------------------------------
+  Arena id's
+----------------------------------------------------------- */
+
+mi_arena_id_t _mi_arena_id_none(void) {
+  return NULL;
+}
+
+mi_arena_t* _mi_arena_from_id(mi_arena_id_t id) {
+  return (mi_arena_t*)id;
+}
+
+
+static bool mi_arena_id_is_suitable(mi_arena_t* arena, mi_arena_t* req_arena) {
+  return ((arena == req_arena) ||                        // they match,
+          (req_arena == NULL && !arena->is_exclusive));  // or the arena is not exclusive, and we didn't request a specific one
+}
+
+bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_t* request_arena) {
+  if (memid.memkind == MI_MEM_ARENA) {
+    return mi_arena_id_is_suitable(memid.mem.arena.arena, request_arena);
+  }
+  else {
+    return mi_arena_id_is_suitable(NULL, request_arena);
+  }
+}
+
+size_t mi_arenas_get_count(mi_subproc_t* subproc) {
+  return mi_atomic_load_relaxed(&subproc->arena_count);
+}
+
+mi_arena_t* mi_arena_from_index(mi_subproc_t* subproc, size_t idx) {
+  mi_assert_internal(idx < mi_arenas_get_count(subproc));
+  return mi_atomic_load_ptr_relaxed(mi_arena_t, &subproc->arenas[idx]);
+}
+
+static size_t mi_arena_info_slices(mi_arena_t* arena) {
+  return arena->info_slices;
+}
+
+#if MI_DEBUG > 1
+static bool mi_heap_has_page(mi_heap_t* heap, mi_arena_t* arena, mi_page_t* page) {
+  mi_assert(arena->arena_idx < MI_MAX_ARENAS);
+  mi_arena_pages_t* arena_pages = heap->arena_pages[arena->arena_idx];
+  return (page->memid.memkind == MI_MEM_ARENA &&
+          page->memid.mem.arena.arena == arena &&
+          arena_pages != NULL &&
+          mi_bitmap_is_setN(arena_pages->pages, page->memid.mem.arena.slice_index, 1));
+}
+#endif
+
+size_t mi_arena_min_alignment(void) {
+  return MI_ARENA_SLICE_ALIGN;
+}
+
+mi_decl_nodiscard static bool mi_arena_commit(mi_arena_t* arena, void* start, size_t size, bool* is_zero, size_t already_committed) {
+  if (arena != NULL && arena->commit_fun != NULL) {
+    return (*arena->commit_fun)(true, start, size, is_zero, arena->commit_fun_arg);
+  }
+  else if (already_committed > 0) {
+    return _mi_os_commit_ex(start, size, is_zero, already_committed);
+  }
+  else {
+    return _mi_os_commit(start, size, is_zero);
+  }
+}
+
+
+
+/* -----------------------------------------------------------
+  Util
+----------------------------------------------------------- */
+
+
+// Size of an arena
+static size_t mi_arena_size(mi_arena_t* arena) {
+  return mi_size_of_slices(arena->slice_count);
+}
+
+// Start of the arena memory area
+static uint8_t* mi_arena_start(mi_arena_t* arena) {
+  return ((uint8_t*)arena);
+}
+
+// Start of a slice
+uint8_t* mi_arena_slice_start(mi_arena_t* arena, size_t slice_index) {
+  return (mi_arena_start(arena) + mi_size_of_slices(slice_index));
+}
+
+// Arena area
+void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) {
+  if (size != NULL) *size = 0;
+  mi_arena_t* arena = _mi_arena_from_id(arena_id);
+  if (arena == NULL) return NULL;
+  if (size != NULL) { *size = mi_size_of_slices(arena->slice_count); }
+  return mi_arena_start(arena);
+}
+
+
+// Create an arena memid
+static mi_memid_t mi_memid_create_arena(mi_arena_t* arena, size_t slice_index, size_t slice_count) {
+  mi_assert_internal(slice_index < UINT32_MAX);
+  mi_assert_internal(slice_count < UINT32_MAX);
+  mi_assert_internal(slice_count > 0);
+  mi_assert_internal(slice_index < arena->slice_count);
+  mi_memid_t memid = _mi_memid_create(MI_MEM_ARENA);
+  memid.mem.arena.arena = arena;
+  memid.mem.arena.slice_index = (uint32_t)slice_index;
+  memid.mem.arena.slice_count = (uint32_t)slice_count;
+  return memid;
+}
+
+// get the arena and slice span
+static mi_arena_t* mi_arena_from_memid(mi_memid_t memid, size_t* slice_index, size_t* slice_count) {
+  mi_assert_internal(memid.memkind == MI_MEM_ARENA);
+  mi_arena_t* arena = memid.mem.arena.arena;
+  if (slice_index!=NULL) { *slice_index = memid.mem.arena.slice_index; }
+  if (slice_count!=NULL) { *slice_count = memid.mem.arena.slice_count; }
+  return arena;
+}
+
+static size_t mi_page_full_size(mi_page_t* page) {
+  if (page->memid.memkind == MI_MEM_ARENA) {
+    return page->memid.mem.arena.slice_count * MI_ARENA_SLICE_SIZE;
+  }
+  else if (mi_memid_is_os(page->memid) || page->memid.memkind == MI_MEM_EXTERNAL) {
+    mi_assert_internal((uint8_t*)page->memid.mem.os.base <= (uint8_t*)page);
+    const ptrdiff_t presize = (uint8_t*)page - (uint8_t*)page->memid.mem.os.base;
+    mi_assert_internal((ptrdiff_t)page->memid.mem.os.size >= presize);
+    return (presize > (ptrdiff_t)page->memid.mem.os.size ? 0 : page->memid.mem.os.size - presize);
+  }
+  else {
+    return 0;
+  }
+}
+
+
+/* -----------------------------------------------------------
+  Arena Allocation
+----------------------------------------------------------- */
+
+static mi_decl_noinline void* mi_arena_try_alloc_at(
+  mi_arena_t* arena, size_t slice_count, bool commit, size_t tseq, mi_memid_t* memid)
+{
+  size_t slice_index;
+  if (!mi_bbitmap_try_find_and_clearN(arena->slices_free, slice_count, tseq, &slice_index)) return NULL;
+
+  // claimed it!
+  void* p = mi_arena_slice_start(arena, slice_index);
+  *memid = mi_memid_create_arena(arena, slice_index, slice_count);
+  memid->is_pinned = arena->memid.is_pinned;
+
+  // set the dirty bits and track which slices become accessible
+  size_t touched_slices = slice_count;
+  if (arena->memid.initially_zero) {
+    size_t already_dirty = 0;
+    memid->initially_zero = mi_bitmap_setN(arena->slices_dirty, slice_index, slice_count, &already_dirty);
+    mi_assert_internal(already_dirty <= touched_slices);
+    touched_slices -= already_dirty;
+  }
+
+  // set commit state
+  if (commit) {
+    // commit requested, but the range may not be committed as a whole: ensure it is committed now
+    const size_t already_committed = mi_bitmap_popcountN(arena->slices_committed, slice_index, slice_count);
+    if (already_committed < slice_count) {
+      // not all committed, try to commit now
+      bool commit_zero = false;
+      if (!_mi_os_commit_ex(p, mi_size_of_slices(slice_count), &commit_zero, mi_size_of_slices(slice_count - already_committed))) {
+        // if the commit fails, release ownership, and return NULL;
+        // note: this does not roll back dirty bits but that is ok.
+        mi_bbitmap_setN(arena->slices_free, slice_index, slice_count);
+        return NULL;
+      }
+      if (commit_zero) {
+        memid->initially_zero = true;
+      }
+
+      // set the commit bits
+      mi_bitmap_setN(arena->slices_committed, slice_index, slice_count, NULL);
+
+      // committed
+      #if MI_DEBUG > 1
+      if (memid->initially_zero) {
+        if (!mi_mem_is_zero(p, mi_size_of_slices(slice_count))) {
+          _mi_error_message(EFAULT, "internal error: arena allocation was not zero-initialized!\n");
+          memid->initially_zero = false;
+        }
+      }
+      #endif
+    }
+    else {
+      // already fully committed.
+      _mi_os_reuse(p, mi_size_of_slices(slice_count));
+      // if the OS has overcommit, and this is the first time we access these pages, then
+      // count the commit now (as at arena reserve we didn't count those commits as these are on-demand)
+      if (_mi_os_has_overcommit() && touched_slices > 0) {
+        mi_subproc_stat_increase( arena->subproc, committed, mi_size_of_slices(touched_slices));
+      }
+    }
+
+    mi_assert_internal(mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count));
+    memid->initially_committed = true;
+
+    // tool support
+    if (memid->initially_zero) {
+      mi_track_mem_defined(p, slice_count * MI_ARENA_SLICE_SIZE);
+    }
+    else {
+      mi_track_mem_undefined(p, slice_count * MI_ARENA_SLICE_SIZE);
+    }
+  }
+  else {
+    // no need to commit, but check if it is already fully committed
+    memid->initially_committed = mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count);
+    if (!memid->initially_committed) {
+      // partly committed.. adjust stats
+      size_t already_committed_count = 0;
+      mi_bitmap_setN(arena->slices_committed, slice_index, slice_count, &already_committed_count);
+      mi_bitmap_clearN(arena->slices_committed, slice_index, slice_count);
+      mi_subproc_stat_decrease(arena->subproc, committed, mi_size_of_slices(already_committed_count));
+    }
+  }
+
+  mi_assert_internal(mi_bbitmap_is_clearN(arena->slices_free, slice_index, slice_count));
+  if (commit) { mi_assert_internal(mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count)); }
+  if (commit) { mi_assert_internal(memid->initially_committed); }
+  mi_assert_internal(mi_bitmap_is_setN(arena->slices_dirty, slice_index, slice_count));
+
+  return p;
+}
+
+
+static int mi_reserve_os_memory_ex2(mi_subproc_t* subproc, size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id);
+
+// try to reserve a fresh arena space
+static bool mi_arena_reserve(mi_subproc_t* subproc, size_t req_size, bool allow_large, mi_arena_id_t* arena_id)
+{
+  const size_t arena_count = mi_arenas_get_count(subproc);
+  if (arena_count > (MI_MAX_ARENAS - 4)) return false;
+
+  // calc reserve
+  size_t arena_reserve = mi_option_get_size(mi_option_arena_reserve);
+  if (arena_reserve == 0) return false;
+
+  if (!_mi_os_has_virtual_reserve()) {
+    arena_reserve = arena_reserve/4;  // be conservative if virtual reserve is not supported (for WASM for example)
+  }
+  arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_SLICE_SIZE);
+
+  if (arena_count >= 1 && arena_count <= 128) {
+    // scale up the arena sizes exponentially every 8 entries
+    const size_t multiplier = (size_t)1 << _mi_clamp(arena_count/8, 0, 16);
+    size_t reserve = 0;
+    if (!mi_mul_overflow(multiplier, arena_reserve, &reserve)) {
+      arena_reserve = reserve;
+    }
+  }
+
+  // try to accommodate the requested size for huge allocations
+  if (arena_reserve < req_size) {
+    arena_reserve = _mi_align_up(req_size + MI_ARENA_MAX_CHUNK_OBJ_SIZE, MI_ARENA_MAX_CHUNK_OBJ_SIZE); // over-reserve for meta-info
+  }
+
+  // check arena bounds
+  const size_t min_reserve = MI_ARENA_MIN_SIZE;
+  const size_t max_reserve = MI_ARENA_MAX_SIZE;   // 16 GiB
+  if (arena_reserve < min_reserve) {
+    arena_reserve = min_reserve;
+  }
+  else if (arena_reserve > max_reserve) {
+    arena_reserve = max_reserve;
+  }
+
+  // should be able to at least handle the current allocation size
+  if (arena_reserve < req_size) return false;
+
+  // commit eagerly?
+  bool arena_commit = false;
+  const bool overcommit = _mi_os_has_overcommit();
+  if (mi_option_get(mi_option_arena_eager_commit) == 2) { arena_commit = overcommit; }
+  else if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; }
+
+  // on an OS with overcommit (Linux) we don't count the commit yet as it is on-demand. Once a slice
+  // is actually allocated for the first time it will be counted.
+  const bool adjust = (overcommit && arena_commit);
+  if (adjust) { mi_subproc_stat_adjust_decrease( subproc, committed, arena_reserve); }
+  // and try to reserve the arena
+  int err = mi_reserve_os_memory_ex2(subproc, arena_reserve, arena_commit, allow_large, false /* exclusive? */, arena_id);
+  if (err != 0) {
+    if (adjust) { mi_subproc_stat_adjust_increase( subproc, committed, arena_reserve); } // roll back
+    // failed to allocate: try a smaller size arena as fallback?
+    const size_t small_arena_reserve = 4 * MI_ARENA_MIN_SIZE; // 128 MiB (or 32 MiB on 32-bit)
+    if (arena_reserve > small_arena_reserve && small_arena_reserve > req_size) {
+      // try again
+      if (adjust) { mi_subproc_stat_adjust_decrease(subproc, committed, small_arena_reserve); }
+      err = mi_reserve_os_memory_ex2(subproc, small_arena_reserve, arena_commit, allow_large, false /* exclusive? */, arena_id);
+      if (err != 0 && adjust) { mi_subproc_stat_adjust_increase( subproc, committed, small_arena_reserve); } // roll back
+    }
+  }
+  return (err==0);
+}
+
+
+
+
+/* -----------------------------------------------------------
+  Arena iteration
+----------------------------------------------------------- */
+
+static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_t* req_arena, bool match_numa, int numa_node, bool allow_pinned) {
+  if (!allow_pinned && arena->memid.is_pinned) return false;
+  if (!mi_arena_id_is_suitable(arena, req_arena)) return false;
+  if (req_arena == NULL) { // if not specific, check numa affinity
+    const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node);
+    if (match_numa) { if (!numa_suitable) return false; }
+               else { if (numa_suitable)  return false; }
+  }
+  return true;
+}
+
+// determine the start of search; important to keep heaps and threads
+// into their own memory regions to reduce contention.
+static size_t mi_arena_start_idx(mi_heap_t* heap, size_t tseq, size_t arena_cycle) {
+  const size_t hseq   = heap->heap_seq;
+  const size_t hcount = mi_atomic_load_relaxed(&heap->subproc->heap_count);
+  if (arena_cycle <= 1)     return 0;
+  if (hseq==0 || hcount<=1) return (tseq % arena_cycle); // common for single heap programs
+
+  // spread heaps evenly among arena's, and then evenly for threads in their fraction
+  size_t start;
+  mi_assert_internal(arena_cycle <= 0x8FF);             // prevent overflow on 32-bit
+  const size_t frac = (arena_cycle * 256) / hcount;     // fraction in the arena_cycle; at most: arena_cycle * 0x100
+  if (frac==0) {
+    // many heaps (> 256 per arena)
+    start = (hseq % arena_cycle);
+  }
+  else {
+    const size_t hspot = (hseq % hcount);
+    start = (frac * hspot) / 256;
+    if (frac >= 512) {  // at least 2 arena's per heap?
+      start = start + (tseq % (frac/256));
+    }
+  }
+  mi_assert_internal(start < arena_cycle);
+  return start;
+}
+
+#define mi_forall_arenas(heap, req_arena, tseq, name_arena) { \
+  const size_t _arena_count = mi_arenas_get_count(heap->subproc); \
+  const size_t _arena_cycle = (_arena_count == 0 ? 0 : _arena_count - 1); /* first search the arenas below the last one */ \
+  /* always start searching in the arena's below the max */ \
+  const size_t _start = mi_arena_start_idx(heap,tseq,_arena_cycle); \
+  for (size_t _i = 0; _i < _arena_count; _i++) { \
+    mi_arena_t* name_arena; \
+    if (req_arena != NULL) { \
+      name_arena = req_arena; /* if there is a specific req_arena, only search that one */\
+      if (_i > 0) break;      /* only once */ \
+    } \
+    else { \
+      size_t _idx; \
+      if (_i < _arena_cycle) { \
+        _idx = _i + _start; \
+        if (_idx >= _arena_cycle) { _idx -= _arena_cycle; } /* adjust so we rotate through the cycle */ \
+      } \
+      else { \
+        _idx = _i; /* remaining arena's after the cycle */ \
+      } \
+      name_arena = mi_arena_from_index(heap->subproc,_idx); \
+    } \
+    if (name_arena != NULL) \
+    {
+
+#define mi_forall_arenas_end()  \
+    } \
+  } \
+  }
+
+#define mi_forall_suitable_arenas(heap, req_arena, tseq, match_numa, numa_node, allow_large, name_arena) \
+  mi_forall_arenas(heap, req_arena,tseq,name_arena) { \
+    if (mi_arena_is_suitable(name_arena, req_arena, match_numa, numa_node, allow_large)) { \
+
+#define mi_forall_suitable_arenas_end() \
+  }} \
+  mi_forall_arenas_end()
+
+/* -----------------------------------------------------------
+  Arena allocation
+----------------------------------------------------------- */
+
+// allocate slices from the arenas
+static mi_decl_noinline void* mi_arenas_try_find_free(
+  mi_heap_t* heap, size_t slice_count, size_t alignment,
+  bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, int numa_node, mi_memid_t* memid)
+{
+  // mi_assert_internal(slice_count <= mi_slice_count_of_size(MI_ARENA_MAX_CHUNK_OBJ_SIZE));
+  mi_assert(alignment <= MI_ARENA_SLICE_ALIGN);
+  if (alignment > MI_ARENA_SLICE_ALIGN) return NULL;
+
+  // search arena's
+  mi_forall_suitable_arenas(heap, req_arena, tseq, true /* only numa matching */, numa_node, allow_large, arena)
+  {
+    void* p = mi_arena_try_alloc_at(arena, slice_count, commit, tseq, memid);
+    if (p != NULL) return p;
+  }
+  mi_forall_suitable_arenas_end();
+  if (numa_node < 0) return NULL;
+
+  // search again but now regardless of preferred numa affinity
+  mi_forall_suitable_arenas(heap, req_arena, tseq, false /* numa non-matching now */, numa_node, allow_large, arena)
+  {
+    void* p = mi_arena_try_alloc_at(arena, slice_count, commit, tseq, memid);
+    if (p != NULL) return p;
+  }
+  mi_forall_suitable_arenas_end();
+  return NULL;
+}
+
+// Allocate slices from the arena's -- potentially allocating a fresh arena
+static mi_decl_noinline void* mi_arenas_try_alloc(
+  mi_heap_t* heap,
+  size_t slice_count, size_t alignment,
+  bool commit, bool allow_large,
+  mi_arena_t* req_arena, size_t tseq, int numa_node, mi_memid_t* memid)
+{
+  // mi_assert(slice_count <= MI_ARENA_MAX_CHUNK_OBJ_SLICES);
+  mi_assert(alignment <= MI_ARENA_SLICE_ALIGN);
+  void* p;
+
+  // not too large?
+  if (slice_count * MI_ARENA_SLICE_SIZE > MI_ARENA_MAX_SIZE) return NULL;
+
+  // try to find free slices in the arena's
+  p = mi_arenas_try_find_free(heap, slice_count, alignment, commit, allow_large, req_arena, tseq, numa_node, memid);
+  if (p != NULL) return p;
+
+  // did we need a specific arena?
+  if (req_arena != NULL) return NULL;
+
+  // don't create arena's while preloading (todo: or should we?)
+  if (_mi_preloading()) return NULL;
+
+  // otherwise, try to reserve a new arena -- but one thread at a time.. (todo: allow 2 or 4 to reduce contention?)
+  mi_subproc_t* const subproc = heap->subproc;
+  const size_t arena_count = mi_arenas_get_count(subproc);
+  mi_lock(&subproc->arena_reserve_lock) {
+    if (arena_count == mi_arenas_get_count(subproc)) {
+      // we are the first to enter the lock, reserve a fresh arena
+      mi_arena_id_t arena_id = 0;
+      mi_arena_reserve(subproc, mi_size_of_slices(slice_count), allow_large, &arena_id);
+    }
+    else {
+      // another thread already reserved a new arena
+    }
+  }
+  // try once more to allocate in the new arena
+  mi_assert_internal(req_arena == NULL);
+  p = mi_arenas_try_find_free(heap, slice_count, alignment, commit, allow_large, req_arena, tseq, numa_node, memid);
+  if (p != NULL) return p;
+
+  return NULL;
+}
+
+// Allocate from the OS (if allowed)
+static void* mi_arena_os_alloc_aligned(
+  size_t size, size_t alignment, size_t align_offset,
+  bool commit, bool allow_large,
+  mi_arena_id_t req_arena_id, mi_memid_t* memid)
+{
+  // if we cannot use OS allocation, return NULL
+  if (mi_option_is_enabled(mi_option_disallow_os_alloc) || req_arena_id != _mi_arena_id_none()) {
+    errno = ENOMEM;
+    return NULL;
+  }
+
+  if (align_offset > 0) {
+    return _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, memid);
+  }
+  else {
+    return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid);
+  }
+}
+
+
+// Allocate large sized memory
+void* _mi_arenas_alloc_aligned( mi_heap_t* heap,
+  size_t size, size_t alignment, size_t align_offset,
+  bool commit, bool allow_large,
+  mi_arena_t* req_arena, size_t tseq, int numa_node, mi_memid_t* memid)
+{
+  mi_assert_internal(memid != NULL);
+  mi_assert_internal(size > 0);
+
+  // try to allocate in an arena if the alignment is small enough and the object is not too small (as for theap meta data)
+  if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) &&           // is arena allocation allowed?
+      size >= MI_ARENA_MIN_OBJ_SIZE && size <= MI_ARENA_MAX_OBJ_SIZE &&               // and not too small or too large
+      alignment <= MI_ARENA_SLICE_ALIGN && align_offset == 0)            // and good alignment
+  {
+    const size_t slice_count = mi_slice_count_of_size(size);
+    void* p = mi_arenas_try_alloc(heap, slice_count, alignment, commit, allow_large, req_arena, tseq, numa_node, memid);
+    if (p != NULL) return p;
+  }
+
+  // fall back to the OS
+  void* p = mi_arena_os_alloc_aligned(size, alignment, align_offset, commit, allow_large, req_arena, memid);
+  return p;
+}
+
+void* _mi_arenas_alloc(mi_heap_t* heap, size_t size, bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, int numa_node, mi_memid_t* memid)
+{
+  return _mi_arenas_alloc_aligned(heap, size, MI_ARENA_SLICE_SIZE, 0, commit, allow_large, req_arena, tseq, numa_node, memid);
+}
+
+
+
+/* -----------------------------------------------------------
+  Arena page allocation
+----------------------------------------------------------- */
+
+// release ownership of a page. This may free the page if all blocks were concurrently
+// freed in the meantime. Returns true if the page was freed.
+static bool mi_abandoned_page_unown(mi_page_t* page, mi_theap_t* current_theap) {
+  mi_assert_internal(mi_page_is_owned(page));
+  mi_assert_internal(mi_page_is_abandoned(page));
+  mi_assert_internal(_mi_thread_id()==current_theap->tld->thread_id);
+  mi_thread_free_t tf_new;
+  mi_thread_free_t tf_old = mi_atomic_load_relaxed(&page->xthread_free);
+  do {
+    mi_assert_internal(mi_tf_is_owned(tf_old));
+    while mi_unlikely(mi_tf_block(tf_old) != NULL) {
+      _mi_page_free_collect(page, false);  // update used
+      if (mi_page_all_free(page)) {        // it may become free just before unowning it
+        _mi_arenas_page_unabandon(page, current_theap);
+        _mi_arenas_page_free(page, current_theap);
+        return true;
+      }
+      tf_old = mi_atomic_load_relaxed(&page->xthread_free);
+    }
+    mi_assert_internal(mi_tf_block(tf_old)==NULL);
+    tf_new = mi_tf_create(NULL, false);
+  } while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tf_old, tf_new));
+  return false;
+}
+
+
+static bool mi_arena_try_claim_abandoned(size_t slice_index, mi_arena_t* arena, bool* keep_abandoned) {
+  // found an abandoned page of the right size
+  mi_page_t* const page  = (mi_page_t*)mi_arena_slice_start(arena, slice_index);
+  // can we claim ownership?
+  if (!mi_page_claim_ownership(page)) {
+    // there was a concurrent free that reclaims this page ..
+    // we need to keep it in the abandoned map as the free will call `mi_arena_page_unabandon`,
+    // and wait for readers (us!) to finish. This is why it is very important to set the abandoned
+    // bit again (or otherwise the unabandon will never stop waiting).
+    *keep_abandoned = true;
+    return false;
+  }
+  else {
+    // yes, we can reclaim it, keep the abandoned map entry clear
+    *keep_abandoned = false;
+    return true;
+  }
+}
+
+// allocate initial arena_pages from the main heap
+static mi_arena_pages_t* mi_arena_pages_alloc(mi_arena_t* arena);
+
+static mi_arena_pages_t* mi_heap_arena_pages(mi_heap_t* heap, mi_arena_t* arena) {
+  mi_assert_internal(arena!=NULL);
+  mi_assert_internal(heap!=NULL);
+  mi_assert(arena->arena_idx < MI_MAX_ARENAS);
+  return mi_atomic_load_relaxed(&heap->arena_pages[arena->arena_idx]);
+}
+
+static mi_arena_t* mi_page_arena_pages(mi_page_t* page, size_t* slice_index, size_t* slice_count, mi_arena_pages_t** parena_pages) {
+  // todo: maybe store the arena* directly in the page?
+  mi_assert_internal(mi_page_is_owned(page));
+  mi_arena_t* const arena = mi_arena_from_memid(page->memid, slice_index, slice_count);
+  mi_assert_internal(arena != NULL);
+  if (parena_pages != NULL) {
+    mi_arena_pages_t* const arena_pages = mi_heap_arena_pages(mi_page_heap(page), arena);
+    mi_assert_internal(arena_pages != NULL);
+    mi_assert_internal(slice_index==NULL || mi_bitmap_is_set(arena_pages->pages, *slice_index));
+    *parena_pages = arena_pages;
+  }
+  return arena;
+}
+
+static mi_arena_pages_t* mi_heap_ensure_arena_pages(mi_heap_t* heap, mi_arena_t* arena) {
+  mi_assert_internal(arena!=NULL);
+  mi_assert_internal(heap!=NULL);
+  mi_assert(arena->arena_idx < MI_MAX_ARENAS);
+  mi_arena_pages_t* arena_pages = mi_heap_arena_pages(heap, arena);
+  if (arena_pages==NULL) {
+    mi_lock(&heap->arena_pages_lock) {
+      arena_pages = mi_atomic_load_acquire(&heap->arena_pages[arena->arena_idx]);
+      if (arena_pages == NULL) {  // still NULL?
+        if (_mi_is_heap_main(heap)) {
+          // the page info for the main heap is always allocated as part of an arena
+          arena_pages = &arena->pages_main;
+        }
+        else {
+          // always allocate the arena pages info from the main heap
+          // todo: allocate into the current arena?
+          arena_pages = mi_arena_pages_alloc(arena);
+        }
+        mi_atomic_store_release(&heap->arena_pages[arena->arena_idx], arena_pages);
+      }
+    }
+  }
+  if (_mi_is_heap_main(heap)) { mi_assert(arena_pages != NULL); }  // can never fail
+  return arena_pages;
+}
+
+static mi_page_t* mi_arenas_page_try_find_abandoned(mi_theap_t* theap, size_t slice_count, size_t block_size)
+{
+  mi_heap_t* const heap = theap->heap;
+  const size_t tseq = theap->tld->thread_seq;
+  mi_arena_t* const req_arena = heap->exclusive_arena;
+
+  MI_UNUSED(slice_count);
+  const size_t bin = _mi_bin(block_size);
+  mi_assert_internal(bin < MI_BIN_COUNT);
+
+  // any abandoned in our size class?
+  mi_assert_internal(heap != NULL);
+  if (mi_atomic_load_relaxed(&heap->abandoned_count[bin]) == 0) {
+    return NULL;
+  }
+
+  // search arena's
+  const bool allow_large = true;
+  const int  any_numa = -1;
+  const bool match_numa = true;
+  mi_forall_suitable_arenas(heap, req_arena, tseq, match_numa, any_numa, allow_large, arena)
+  {
+    mi_arena_pages_t* const arena_pages = mi_heap_arena_pages(heap, arena);
+    if (arena_pages != NULL) {
+      size_t slice_index;
+      mi_bitmap_t* const bitmap = arena_pages->pages_abandoned[bin];
+
+      if (mi_bitmap_try_find_and_claim(bitmap, tseq, &slice_index, &mi_arena_try_claim_abandoned, arena)) {
+        // found an abandoned page of the right size
+        // and claimed ownership.
+        mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index);
+        mi_assert_internal(mi_page_is_owned(page));
+        mi_assert_internal(mi_page_is_abandoned(page));
+        mi_assert_internal(mi_heap_has_page(heap, arena, page));
+        mi_atomic_decrement_relaxed(&heap->abandoned_count[bin]);
+        mi_theap_stat_decrease(theap, pages_abandoned, 1);
+        mi_theap_stat_counter_increase(theap, pages_reclaim_on_alloc, 1);
+
+        _mi_page_free_collect(page, false);  // update `used` count
+        mi_assert_internal(mi_bbitmap_is_clearN(arena->slices_free, slice_index, slice_count));
+        mi_assert_internal(page->slice_committed > 0 || mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count));
+        mi_assert_internal(mi_bitmap_is_setN(arena->slices_dirty, slice_index, slice_count));
+        mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
+        mi_assert_internal(_mi_ptr_page(page)==page);
+        mi_assert_internal(_mi_ptr_page(mi_page_start(page))==page);
+        mi_assert_internal(mi_page_block_size(page) == block_size);
+        mi_assert_internal(!mi_page_is_full(page));
+        return page;
+      }
+    }
+  }
+  mi_forall_suitable_arenas_end();
+  return NULL;
+}
+
+
+// Allocate a fresh page
+static mi_page_t* mi_arenas_page_alloc_fresh(mi_theap_t* theap, size_t slice_count, size_t block_size, size_t block_alignment, bool commit)
+{
+  const bool allow_large = (MI_SECURE < 2); // 2 = guard page at end of each arena page
+  const bool os_align = (block_alignment > MI_PAGE_MAX_OVERALLOC_ALIGN);
+  const size_t page_alignment = MI_ARENA_SLICE_ALIGN;
+
+  mi_heap_t*  const heap = theap->heap;
+  mi_tld_t*   const tld  = theap->tld;
+  mi_arena_t* const req_arena = heap->exclusive_arena;
+  const int numa_node = (heap->numa_node >= 0 ? heap->numa_node : tld->numa_node);
+
+
+  // try to allocate from free space in arena's
+  mi_memid_t memid = _mi_memid_none();
+  mi_page_t* page = NULL;
+  const size_t alloc_size = mi_size_of_slices(slice_count);
+  if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) &&      // allowed to allocate from arena's?
+      !os_align &&                                                  // not large alignment
+      slice_count <= (MI_ARENA_MAX_OBJ_SIZE / MI_ARENA_SLICE_SIZE)) // and not too large
+  {
+    page = (mi_page_t*)mi_arenas_try_alloc(heap, slice_count, page_alignment, commit, allow_large, req_arena, tld->thread_seq, numa_node, &memid);
+    if (page != NULL) {
+      mi_arena_pages_t* const arena_pages = mi_heap_ensure_arena_pages(heap, memid.mem.arena.arena);
+      if (arena_pages==NULL) {
+        _mi_arenas_free(page, mi_size_of_slices(slice_count), page->memid); // roll back
+        page = NULL;
+      }
+      else {
+        mi_assert_internal(mi_bitmap_is_clearN(arena_pages->pages, memid.mem.arena.slice_index, memid.mem.arena.slice_count));
+        mi_bitmap_set(arena_pages->pages, memid.mem.arena.slice_index);
+      }
+    }
+  }
+
+  // otherwise fall back to the OS
+  if (page == NULL) {
+    if (os_align) {
+      // note: slice_count already includes the page
+      mi_assert_internal(slice_count >= mi_slice_count_of_size(block_size) + mi_slice_count_of_size(page_alignment));
+      page = (mi_page_t*)mi_arena_os_alloc_aligned(alloc_size, block_alignment, page_alignment /* align offset */, commit, allow_large, req_arena, &memid);
+    }
+    else {
+      page = (mi_page_t*)mi_arena_os_alloc_aligned(alloc_size, page_alignment, 0 /* align offset */, commit, allow_large, req_arena, &memid);
+    }
+  }
+
+  if (page == NULL) return NULL;
+  mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
+  mi_assert_internal(!os_align || _mi_is_aligned((uint8_t*)page + page_alignment, block_alignment));
+
+  // guard page at the end of mimalloc page?
+  #if MI_SECURE < 2
+  const size_t page_noguard_size = alloc_size;
+  #else
+  mi_assert(alloc_size > _mi_os_secure_guard_page_size());
+  const size_t page_noguard_size = alloc_size - _mi_os_secure_guard_page_size();
+  if (memid.initially_committed) {
+    _mi_os_secure_guard_page_set_at((uint8_t*)page + page_noguard_size, memid);
+  }
+  #endif
+
+  // claimed free slices: initialize the page partly
+  if (!memid.initially_zero && memid.initially_committed) {
+    mi_track_mem_undefined(page, slice_count * MI_ARENA_SLICE_SIZE);
+    _mi_memzero_aligned(page, sizeof(*page));
+  }
+  else if (memid.initially_committed) {
+    mi_track_mem_defined(page, slice_count * MI_ARENA_SLICE_SIZE);
+  }
+  #if MI_DEBUG > 1
+  if (memid.initially_zero && memid.initially_committed) {
+    if (!mi_mem_is_zero(page, page_noguard_size)) {
+      _mi_error_message(EFAULT, "internal error: page memory was not zero initialized.\n");
+      memid.initially_zero = false;
+      _mi_memzero_aligned(page, sizeof(*page));
+    }
+  }
+  #endif
+  mi_assert(MI_PAGE_INFO_SIZE >= mi_page_info_size());
+
+  size_t block_start;
+  #if MI_GUARDED
+  // in a guarded build, we align pages with blocks a multiple of an OS page size, to the OS page size
+  // this ensures that all blocks in such pages are OS page size aligned (which is needed for the guard pages)
+  const size_t os_page_size = _mi_os_page_size();
+  mi_assert_internal(MI_PAGE_ALIGN >= os_page_size);
+  if (!os_align && block_size % os_page_size == 0 && block_size > os_page_size /* at least 2 or more */ ) {
+    block_start = _mi_align_up(mi_page_info_size(), os_page_size);
+  }
+  else
+  #endif
+  if (os_align) {
+    block_start = MI_PAGE_ALIGN;
+  }
+  else if (_mi_is_power_of_two(block_size) && block_size <= MI_PAGE_MAX_START_BLOCK_ALIGN2) {
+    // naturally align power-of-2 blocks up to MI_PAGE_MAX_START_BLOCK_ALIGN2 size (4KiB)
+    block_start = _mi_align_up(mi_page_info_size(), block_size);
+  }
+  else if (block_size != 0 && (block_size % MI_PAGE_OSPAGE_BLOCK_ALIGN2) == 0) {
+    // also align large pages that are a multiple of MI_PAGE_OSPAGE_BLOCK_ALIGN2 (4KiB)
+    block_start = _mi_align_up(mi_page_info_size(), MI_PAGE_OSPAGE_BLOCK_ALIGN2);
+  }
+  else {
+    // otherwise start after the info
+    block_start = mi_page_info_size();
+  }
+  const size_t reserved    = (os_align ? 1 : (page_noguard_size - block_start) / block_size);
+  mi_assert_internal(reserved > 0 && reserved <= UINT16_MAX);
+
+  // commit first block?
+  size_t commit_size = 0;
+  if (!memid.initially_committed) {
+    commit_size = _mi_align_up(block_start + block_size, MI_PAGE_MIN_COMMIT_SIZE);
+    if (commit_size > page_noguard_size) { commit_size = page_noguard_size; }
+    bool is_zero;
+    if mi_unlikely(!mi_arena_commit( mi_memid_arena(memid), page, commit_size, &is_zero, 0)) {
+      _mi_arenas_free(page, alloc_size, memid);
+      return NULL;
+    }
+    if (!memid.initially_zero && !is_zero) {
+      _mi_memzero_aligned(page, commit_size);
+    }
+  }
+
+  // initialize
+  page->reserved = (uint16_t)reserved;
+  page->page_start = (uint8_t*)page + block_start;
+  page->block_size = block_size;
+  page->slice_committed = commit_size;
+  page->memid = memid;
+  page->free_is_zero = memid.initially_zero;
+
+  // and own it
+  mi_page_claim_ownership(page);
+
+  // register in the page map
+  if mi_unlikely(!_mi_page_map_register(page)) {
+    _mi_arenas_free( page, alloc_size, memid );
+    return NULL;
+  }
+
+  // stats
+  mi_theap_stat_increase(theap, pages, 1);
+  mi_theap_stat_increase(theap, page_bins[_mi_page_stats_bin(page)], 1);
+
+  mi_assert_internal(_mi_ptr_page(page)==page);
+  mi_assert_internal(_mi_ptr_page(mi_page_start(page))==page);
+  mi_assert_internal(mi_page_block_size(page) == block_size);
+  mi_assert_internal(mi_page_is_abandoned(page));
+  mi_assert_internal(mi_page_is_owned(page));
+
+  return page;
+}
+
+// Allocate a regular small/medium/large page.
+static mi_page_t* mi_arenas_page_regular_alloc(mi_theap_t* theap, size_t slice_count, size_t block_size)
+{
+  // 1. look for an abandoned page
+  mi_page_t* page = mi_arenas_page_try_find_abandoned(theap, slice_count, block_size);
+  if (page != NULL) {
+    return page;  // return as abandoned
+  }
+
+  // 2. find a free block, potentially allocating a new arena
+  const long commit_on_demand = mi_option_get(mi_option_page_commit_on_demand);
+  const bool commit = (slice_count <= mi_slice_count_of_size(MI_PAGE_MIN_COMMIT_SIZE) ||  // always commit small pages
+                       (commit_on_demand == 2 && _mi_os_has_overcommit()) || (commit_on_demand == 0));
+  page = mi_arenas_page_alloc_fresh(theap, slice_count, block_size, 1, commit);
+  if (page == NULL) return NULL;
+
+  mi_assert_internal(page->memid.memkind != MI_MEM_ARENA || page->memid.mem.arena.slice_count == slice_count);
+  if (!_mi_page_init(theap, page)) {
+    _mi_arenas_free( page, mi_page_full_size(page), page->memid);
+    return NULL;
+  }
+
+  return page;
+}
+
+// Allocate a page containing one block (very large, or with large alignment)
+static mi_page_t* mi_arenas_page_singleton_alloc(mi_theap_t* theap, size_t block_size, size_t block_alignment)
+{
+  const bool os_align = (block_alignment > MI_PAGE_MAX_OVERALLOC_ALIGN);
+  const size_t info_size = (os_align ? MI_PAGE_ALIGN : mi_page_info_size());
+  #if MI_SECURE < 2
+  const size_t slice_count = mi_slice_count_of_size(info_size + block_size);
+  #else
+  const size_t slice_count = mi_slice_count_of_size(_mi_align_up(info_size + block_size, _mi_os_secure_guard_page_size()) + _mi_os_secure_guard_page_size());
+  #endif
+
+  mi_page_t* page = mi_arenas_page_alloc_fresh(theap, slice_count, block_size, block_alignment, true /* commit singletons always */);
+  if (page == NULL) return NULL;
+
+  mi_assert(page->reserved == 1);
+  if (!_mi_page_init(theap, page)) {
+    _mi_arenas_free( page, mi_page_full_size(page), page->memid);
+    return NULL;
+  }
+
+  return page;
+}
+
+
+mi_page_t* _mi_arenas_page_alloc(mi_theap_t* theap, size_t block_size, size_t block_alignment) {
+  mi_page_t* page;
+  if mi_unlikely(block_alignment > MI_PAGE_MAX_OVERALLOC_ALIGN) {
+    mi_assert_internal(_mi_is_power_of_two(block_alignment));
+    page = mi_arenas_page_singleton_alloc(theap, block_size, block_alignment);
+  }
+  else if (block_size <= MI_SMALL_MAX_OBJ_SIZE) {
+    page = mi_arenas_page_regular_alloc(theap, mi_slice_count_of_size(MI_SMALL_PAGE_SIZE), block_size);
+  }
+  else if (block_size <= MI_MEDIUM_MAX_OBJ_SIZE) {
+    page = mi_arenas_page_regular_alloc(theap, mi_slice_count_of_size(MI_MEDIUM_PAGE_SIZE), block_size);
+  }
+  #if MI_ENABLE_LARGE_PAGES
+  else if (block_size <= MI_LARGE_MAX_OBJ_SIZE) {
+    page = mi_arenas_page_regular_alloc(theap, mi_slice_count_of_size(MI_LARGE_PAGE_SIZE), block_size);
+  }
+  #endif
+  else {
+    page = mi_arenas_page_singleton_alloc(theap, block_size, block_alignment);
+  }
+  if mi_unlikely(page == NULL) {
+    return NULL;
+  }
+  // mi_assert_internal(page == NULL || _mi_page_segment(page)->subproc == tld->subproc);
+  mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
+  mi_assert_internal(_mi_ptr_page(page)==page);
+  mi_assert_internal(_mi_ptr_page(mi_page_start(page))==page);
+  mi_assert_internal(block_alignment <= MI_PAGE_MAX_OVERALLOC_ALIGN || _mi_is_aligned(mi_page_start(page), block_alignment));
+
+  return page;
+}
+
+void _mi_arenas_page_free(mi_page_t* page, mi_theap_t* current_theapx) {
+  mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
+  mi_assert_internal(_mi_ptr_page(page)==page);
+  mi_assert_internal(mi_page_is_owned(page));
+  mi_assert_internal(mi_page_all_free(page));
+  mi_assert_internal(mi_page_is_abandoned(page));
+  mi_assert_internal(page->next==NULL && page->prev==NULL);
+  mi_assert_internal(current_theapx == NULL || _mi_thread_id()==current_theapx->tld->thread_id);
+
+  if (current_theapx != NULL) {
+    mi_theap_stat_decrease(current_theapx, page_bins[_mi_page_stats_bin(page)], 1);
+    mi_theap_stat_decrease(current_theapx, pages, 1);
+  }
+  else {
+    mi_heap_t* const heap = mi_page_heap(page);
+    mi_heap_stat_decrease(heap, page_bins[_mi_page_stats_bin(page)], 1);
+    mi_heap_stat_decrease(heap, pages, 1);
+  }
+
+  #if MI_DEBUG>1
+  if (page->memid.memkind==MI_MEM_ARENA && !mi_page_is_full(page)) {
+    size_t bin = _mi_bin(mi_page_block_size(page));
+    size_t slice_index;
+    size_t slice_count;
+    mi_arena_pages_t* arena_pages = NULL;
+    mi_arena_t* const arena = mi_page_arena_pages(page, &slice_index, &slice_count, &arena_pages);
+    mi_assert_internal(mi_bbitmap_is_clearN(arena->slices_free, slice_index, slice_count));
+    mi_assert_internal(page->slice_committed > 0 || mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count));
+    mi_assert_internal(mi_bitmap_is_clearN(arena_pages->pages_abandoned[bin], slice_index, 1));
+    mi_assert_internal(mi_bitmap_is_setN(arena_pages->pages, slice_index, 1));
+    // note: we cannot check for `!mi_page_is_abandoned_and_mapped` since that may
+    // be (temporarily) not true if the free happens while trying to reclaim
+    // see `mi_arena_try_claim_abandoned`
+  }
+  #endif
+
+  // recommit guard page at the end?
+  // we must do this since we may later allocate large spans over this page and cannot have a guard page in between
+  #if MI_SECURE >= 2
+  if (!page->memid.is_pinned) {
+    _mi_os_secure_guard_page_reset_before((uint8_t*)page + mi_page_full_size(page), page->memid);
+  }
+  #endif
+
+  // unregister page
+  _mi_page_map_unregister(page);
+  if (page->memid.memkind == MI_MEM_ARENA) {
+    mi_arena_pages_t* arena_pages;
+    size_t slice_index;
+    size_t slice_count; MI_UNUSED(slice_count);
+    mi_arena_t* const arena = mi_page_arena_pages(page, &slice_index, &slice_count, &arena_pages);
+    mi_assert_internal(arena_pages!=NULL);
+    mi_bitmap_clear(arena_pages->pages, slice_index);
+    if (page->slice_committed > 0) {
+      // if committed on-demand, set the commit bits to account commit properly
+      mi_assert_internal(mi_page_full_size(page) >= page->slice_committed);
+      const size_t total_slices = page->slice_committed / MI_ARENA_SLICE_SIZE;  // conservative
+      //mi_assert_internal(mi_bitmap_is_clearN(arena->slices_committed, slice_index, total_slices));
+      mi_assert_internal(slice_count >= total_slices);
+      if (total_slices > 0) {
+        mi_bitmap_setN(arena->slices_committed, slice_index, total_slices, NULL);
+      }
+      // any left over?
+      const size_t extra = page->slice_committed % MI_ARENA_SLICE_SIZE;
+      if (extra > 0) {
+        // pretend it was decommitted already
+        mi_subproc_stat_decrease(arena->subproc, committed, extra);
+      }
+    }
+    else {
+      mi_assert_internal(mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count));
+    }
+  }
+  _mi_arenas_free(page, mi_page_full_size(page), page->memid);
+}
+
+/* -----------------------------------------------------------
+  Arena abandon
+----------------------------------------------------------- */
+
+void _mi_arenas_page_abandon(mi_page_t* page, mi_theap_t* current_theap) {
+  mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
+  mi_assert_internal(_mi_ptr_page(page)==page);
+  mi_assert_internal(mi_page_is_owned(page));
+  mi_assert_internal(mi_page_is_abandoned(page));
+  mi_assert_internal(!mi_page_all_free(page));
+  mi_assert_internal(page->next==NULL && page->prev == NULL);
+  mi_assert_internal(_mi_thread_id()==current_theap->tld->thread_id);
+  // mi_assert_internal(current_theap == _mi_page_associated_theap(page));
+
+  mi_heap_t* heap = mi_page_heap(page); mi_assert_internal(heap==current_theap->heap);
+  if (page->memid.memkind==MI_MEM_ARENA && !mi_page_is_full(page)) {
+    // make available for allocations
+    size_t bin = _mi_bin(mi_page_block_size(page));
+    size_t slice_index;
+    size_t slice_count;
+    mi_arena_pages_t* arena_pages = NULL;
+    mi_arena_t* const arena = mi_page_arena_pages(page, &slice_index, &slice_count, &arena_pages); MI_UNUSED(arena);
+
+    mi_assert_internal(!mi_page_is_singleton(page));
+    mi_assert_internal(mi_bbitmap_is_clearN(arena->slices_free, slice_index, slice_count));
+    mi_assert_internal(page->slice_committed > 0 || mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count));
+    mi_assert_internal(mi_bitmap_is_setN(arena->slices_dirty, slice_index, slice_count));
+
+    mi_page_set_abandoned_mapped(page);
+    const bool was_clear = mi_bitmap_set(arena_pages->pages_abandoned[bin], slice_index);
+    MI_UNUSED(was_clear); mi_assert_internal(was_clear);
+    mi_atomic_increment_relaxed(&heap->abandoned_count[bin]);
+    mi_theap_stat_increase(current_theap, pages_abandoned, 1);
+  }
+  else {
+    // page is full (or a singleton), or the page is OS/externally allocated
+    // leave as is; it will be reclaimed when an object is free'd in the page
+    // but for non-arena pages, add to the subproc list so these can be visited
+    if (page->memid.memkind != MI_MEM_ARENA && mi_option_is_enabled(mi_option_visit_abandoned)) {
+      mi_lock(&heap->os_abandoned_pages_lock) {
+        // push in front
+        page->prev = NULL;
+        page->next = heap->os_abandoned_pages;
+        if (page->next != NULL) { page->next->prev = page; }
+        heap->os_abandoned_pages = page;
+      }
+    }
+    mi_theap_stat_increase(current_theap, pages_abandoned, 1);
+  }
+  mi_abandoned_page_unown(page, current_theap);
+}
+
+
+// this is called from `free.c:mi_free_try_collect_mt` only.
+bool _mi_arenas_page_try_reabandon_to_mapped(mi_page_t* page) {
+  mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
+  mi_assert_internal(_mi_ptr_page(page)==page);
+  mi_assert_internal(mi_page_is_owned(page));
+  mi_assert_internal(mi_page_is_abandoned(page));
+  mi_assert_internal(!mi_page_is_abandoned_mapped(page));
+  mi_assert_internal(!mi_page_is_full(page));
+  mi_assert_internal(!mi_page_all_free(page));
+  mi_assert_internal(!mi_page_is_singleton(page));
+  if (mi_page_is_full(page) || mi_page_is_abandoned_mapped(page) || page->memid.memkind != MI_MEM_ARENA) {
+    return false;
+  }
+  else {
+    // do not use _mi_heap_theap as we may call this during shutdown of threads and don't want to reinitialize the theap
+    mi_theap_t* const theap = _mi_page_associated_theap_peek(page);
+    if (theap == NULL) {
+      return false;
+    }
+    else {
+      mi_theap_stat_counter_increase(theap, pages_reabandon_full, 1);
+      mi_theap_stat_adjust_decrease(theap, pages_abandoned, 1);  // adjust as we are not abandoning fresh
+      _mi_arenas_page_abandon(page, theap);
+      return true;
+    }
+  }
+}
+
+// called from `mi_free` if trying to unabandon an abandoned page
+void _mi_arenas_page_unabandon(mi_page_t* page, mi_theap_t* current_theapx) {
+  mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
+  mi_assert_internal(_mi_ptr_page(page)==page);
+  mi_assert_internal(mi_page_is_owned(page));
+  mi_assert_internal(mi_page_is_abandoned(page));
+  mi_assert_internal(current_theapx==NULL || _mi_thread_id()==current_theapx->tld->thread_id);
+
+  mi_heap_t* const heap = mi_page_heap(page);
+  if (mi_page_is_abandoned_mapped(page)) {
+    mi_assert_internal(page->memid.memkind==MI_MEM_ARENA);
+    // remove from the abandoned map
+    size_t bin = _mi_bin(mi_page_block_size(page));
+    size_t slice_index;
+    size_t slice_count;
+    mi_arena_pages_t* arena_pages;
+    mi_arena_t* arena = mi_page_arena_pages(page, &slice_index, &slice_count, &arena_pages);  MI_UNUSED(arena);
+
+    mi_assert_internal(mi_bbitmap_is_clearN(arena->slices_free, slice_index, slice_count));
+    mi_assert_internal(page->slice_committed > 0 || mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count));
+
+    // this busy waits until a concurrent reader (from alloc_abandoned) is done
+    mi_bitmap_clear_once_set(arena_pages->pages_abandoned[bin], slice_index);
+    mi_page_clear_abandoned_mapped(page);
+    mi_atomic_decrement_relaxed(&heap->abandoned_count[bin]);
+  }
+  else {
+    // page is full (or a singleton), page is OS allocated
+    // if not an arena page, remove from the subproc os pages list
+    if (page->memid.memkind != MI_MEM_ARENA && mi_option_is_enabled(mi_option_visit_abandoned)) {
+      mi_lock(&heap->os_abandoned_pages_lock) {
+        if (page->prev != NULL) { page->prev->next = page->next; }
+        if (page->next != NULL) { page->next->prev = page->prev; }
+        if (heap->os_abandoned_pages == page) { heap->os_abandoned_pages = page->next; }
+        page->next = NULL;
+        page->prev = NULL;
+      }
+    }
+  }
+  if (current_theapx!=NULL) {
+    mi_theap_stat_decrease(current_theapx, pages_abandoned, 1);
+  }
+  else {
+    mi_heap_stat_decrease(heap, pages_abandoned, 1);
+  }
+}
+
+
+/* -----------------------------------------------------------
+  Arena free
+----------------------------------------------------------- */
+static void mi_arena_schedule_purge(mi_arena_t* arena, size_t slice_index, size_t slices);
+static void mi_arenas_try_purge(bool force, bool visit_all, mi_subproc_t* subproc, size_t tseq);
+
+void _mi_arenas_free(void* p, size_t size, mi_memid_t memid) {
+  if (p==NULL) return;
+  if (size==0) return;
+
+  // need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.)
+  mi_track_mem_undefined(p, size);
+
+  if (mi_memkind_is_os(memid.memkind)) {
+    // was a direct OS allocation, pass through
+    _mi_os_free(p, size, memid);
+  }
+  else if (memid.memkind == MI_MEM_ARENA) {
+    // allocated in an arena
+    size_t slice_count;
+    size_t slice_index;
+    mi_arena_t* arena = mi_arena_from_memid(memid, &slice_index, &slice_count);
+    mi_assert_internal((size%MI_ARENA_SLICE_SIZE)==0);
+    mi_assert_internal((slice_count*MI_ARENA_SLICE_SIZE)==size);
+    mi_assert_internal(mi_arena_slice_start(arena,slice_index) <= (uint8_t*)p);
+    mi_assert_internal(mi_arena_slice_start(arena,slice_index) + mi_size_of_slices(slice_count) > (uint8_t*)p);
+    // checks
+    if (arena == NULL) {
+      _mi_error_message(EINVAL, "trying to free from an invalid arena: %p, size %zu, memid: 0x%zx\n", p, size, memid);
+      return;
+    }
+    mi_assert_internal(slice_index < arena->slice_count);
+    mi_assert_internal(slice_index >= mi_arena_info_slices(arena));
+    if (slice_index < mi_arena_info_slices(arena) || slice_index > arena->slice_count) {
+      _mi_error_message(EINVAL, "trying to free from an invalid arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
+      return;
+    }
+
+    // potentially decommit
+    if (!arena->memid.is_pinned /* && !arena->memid.initially_committed */) { // todo: allow decommit even if initially committed?
+      // (delay) purge the page
+      mi_arena_schedule_purge(arena, slice_index, slice_count);
+    }
+
+    // and make it available to others again
+    bool all_inuse = mi_bbitmap_setN(arena->slices_free, slice_index, slice_count);
+    if (!all_inuse) {
+      _mi_error_message(EAGAIN, "trying to free an already freed arena block: %p, size %zu\n", mi_arena_slice_start(arena,slice_index), mi_size_of_slices(slice_count));
+      return;
+    };
+  }
+  else if (memid.memkind == MI_MEM_META) {
+    _mi_meta_free(p, size, memid);
+  }
+  else {
+    // arena was none, external, or static; nothing to do
+    mi_assert_internal(mi_memid_needs_no_free(memid));
+  }
+
+  // try to purge expired decommits
+  // mi_arenas_try_purge(false, false, NULL);
+}
+
+// Purge the arenas; if `force_purge` is true, amenable parts are purged even if not yet expired
+void _mi_arenas_collect(bool force_purge, bool visit_all, mi_tld_t* tld) {
+  mi_arenas_try_purge(force_purge, visit_all, tld->subproc, tld->thread_seq);
+}
+
+
+// Is a pointer contained in the given arena area?
+bool mi_arena_contains(mi_arena_id_t arena_id, const void* p) {
+  mi_arena_t* arena = _mi_arena_from_id(arena_id);
+  return (mi_arena_start(arena) <= (const uint8_t*)p &&
+          mi_arena_start(arena) + mi_size_of_slices(arena->slice_count) >(const uint8_t*)p);
+}
+
+// Is a pointer inside any of our arenas?
+bool _mi_arenas_contain(const void* p) {
+  mi_subproc_t* subproc = _mi_subproc();
+  const size_t max_arena = mi_arenas_get_count(subproc);
+  for (size_t i = 0; i < max_arena; i++) {
+    mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &subproc->arenas[i]);
+    if (arena != NULL && mi_arena_contains(arena,p)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+
+
+/* -----------------------------------------------------------
+  Remove an arena.
+----------------------------------------------------------- */
+
+// destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit`
+// for dynamic libraries that are unloaded and need to release all their allocated memory.
+static void mi_arenas_unsafe_destroy(mi_subproc_t* subproc) {
+  mi_assert_internal(subproc != NULL);
+  const size_t arena_count = mi_arenas_get_count(subproc);
+  for (size_t i = 0; i < arena_count; i++) {
+    mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &subproc->arenas[i]);
+    if (arena != NULL) {
+      // mi_lock_done(&arena->abandoned_visit_lock);
+      mi_atomic_store_ptr_release(mi_arena_t, &subproc->arenas[i], NULL);
+      if (mi_memkind_is_os(arena->memid.memkind)) {
+        _mi_os_free_ex(mi_arena_start(arena), mi_arena_size(arena), true, arena->memid, subproc); // pass `subproc` to avoid accessing the theap pointer (in `_mi_subproc()`)
+      }
+    }
+  }
+  // try to lower the max arena.
+  size_t expected = arena_count;
+  mi_atomic_cas_strong_acq_rel(&subproc->arena_count, &expected, 0);
+}
+
+
+// destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit`
+// for dynamic libraries that are unloaded and need to release all their allocated memory.
+void _mi_arenas_unsafe_destroy_all(mi_subproc_t* subproc) {
+  mi_arenas_unsafe_destroy(subproc);
+  // mi_arenas_try_purge(true /* force purge */, true /* visit all*/, subproc, 0 /* thread seq */);  // purge non-owned arenas
+}
+
+
+/* -----------------------------------------------------------
+  Add an arena.
+----------------------------------------------------------- */
+
+static bool mi_arenas_add(mi_subproc_t* subproc, mi_arena_t* arena, mi_arena_id_t* arena_id)
+{
+  mi_assert_internal(arena != NULL);
+  mi_assert_internal(arena->slice_count > 0);
+  if (arena_id != NULL) { *arena_id = NULL; }
+
+  // try to find a NULL entry
+  mi_arena_t* expected;
+  size_t count = mi_arenas_get_count(subproc);
+  for( size_t i = 0; i < count; i++) {
+    if (mi_arena_from_index(subproc,i) == NULL) {
+      arena->arena_idx = i;
+      expected = NULL;
+      if (mi_atomic_cas_ptr_strong_release(mi_arena_t, &subproc->arenas[i], &expected, arena)) {
+        // success
+        if (arena_id != NULL) { *arena_id = arena; }
+        return true;
+      }
+    }
+  }
+
+  // otherwise, try to allocate a fresh slot
+  while(count<MI_MAX_ARENAS) {
+    if (mi_atomic_cas_strong_release(&subproc->arena_count, &count, count+1)) {
+      arena->arena_idx = count;
+      expected = NULL;
+      if (mi_atomic_cas_ptr_strong_release(mi_arena_t, &subproc->arenas[count], &expected, arena)) {
+        mi_subproc_stat_counter_increase(arena->subproc, arena_count, 1);
+        if (arena_id != NULL) { *arena_id = arena; }
+        return true;
+      }
+    }
+  }
+
+  // failed
+  arena->arena_idx = 0;
+  arena->subproc = NULL;
+  return false;
+}
+
+static size_t mi_arena_pages_size(size_t slice_count, size_t* bitmap_base) {
+  if (slice_count == 0) slice_count = MI_BCHUNK_BITS;
+  mi_assert_internal((slice_count % MI_BCHUNK_BITS) == 0);
+  const size_t base_size = _mi_align_up(sizeof(mi_arena_pages_t), MI_BCHUNK_SIZE);
+  const size_t bitmaps_count = 1 + MI_ARENA_BIN_COUNT; // pages, and abandoned
+  const size_t bitmaps_size = bitmaps_count * mi_bitmap_size(slice_count, NULL);
+  const size_t size = base_size + bitmaps_size;
+  if (bitmap_base != NULL) *bitmap_base = base_size;
+  return size;
+}
+
+static size_t mi_arena_info_slices_needed(size_t slice_count, size_t* bitmap_base) {
+  if (slice_count == 0) slice_count = MI_BCHUNK_BITS;
+  mi_assert_internal((slice_count % MI_BCHUNK_BITS) == 0);
+  const size_t base_size = _mi_align_up(sizeof(mi_arena_t), MI_BCHUNK_SIZE);
+  const size_t bitmaps_count = 4 + MI_ARENA_BIN_COUNT; // commit, dirty, purge, pages, and abandoned
+  const size_t bitmaps_size = bitmaps_count * mi_bitmap_size(slice_count, NULL) + mi_bbitmap_size(slice_count, NULL); // + free
+  const size_t size = base_size + bitmaps_size;
+
+  const size_t os_page_size = _mi_os_page_size();
+  const size_t info_size = _mi_align_up(size, os_page_size) + _mi_os_secure_guard_page_size();
+  const size_t info_slices = mi_slice_count_of_size(info_size);
+
+  if (bitmap_base != NULL) *bitmap_base = base_size;
+  return info_slices;
+}
+
+static mi_bitmap_t* mi_arena_bitmap_init(size_t slice_count, uint8_t** base) {
+  mi_bitmap_t* bitmap = (mi_bitmap_t*)(*base);
+  *base = (*base) + mi_bitmap_init(bitmap, slice_count, true /* already zero */);
+  return bitmap;
+}
+
+static mi_bbitmap_t* mi_arena_bbitmap_init(size_t slice_count, uint8_t** base) {
+  mi_bbitmap_t* bbitmap = (mi_bbitmap_t*)(*base);
+  *base = (*base) + mi_bbitmap_init(bbitmap, slice_count, true /* already zero */);
+  return bbitmap;
+}
+
+static mi_arena_pages_t* mi_arena_pages_alloc(mi_arena_t* arena) {
+  const size_t slice_count = arena->slice_count;
+  size_t bitmap_base = 0;
+  const size_t size = mi_arena_pages_size(slice_count, &bitmap_base);
+  mi_arena_pages_t* arena_pages = (mi_arena_pages_t*)mi_heap_zalloc_aligned(mi_heap_main(), size, MI_BCHUNK_SIZE);
+  if (arena_pages==NULL) return NULL;
+  uint8_t* base = (uint8_t*)arena_pages + bitmap_base;
+  mi_assert_internal(_mi_is_aligned(base, MI_BCHUNK_SIZE));
+  arena_pages->pages = mi_arena_bitmap_init(slice_count, &base);
+  for (size_t i = 0; i < MI_ARENA_BIN_COUNT; i++) {
+    arena_pages->pages_abandoned[i] = mi_arena_bitmap_init(slice_count, &base);
+  }
+  return arena_pages;
+}
+
+
+static bool mi_manage_os_memory_ex2(mi_subproc_t* subproc, void* start, size_t size, int numa_node, bool exclusive,
+                                    mi_memid_t memid, mi_commit_fun_t* commit_fun, void* commit_fun_arg, mi_arena_id_t* arena_id) mi_attr_noexcept
+{
+  mi_assert(_mi_is_aligned(start,MI_ARENA_SLICE_SIZE));
+  mi_assert(start!=NULL);
+  if (arena_id != NULL) { *arena_id = _mi_arena_id_none(); }
+  if (start==NULL) return false;
+  if (!_mi_is_aligned(start,MI_ARENA_SLICE_SIZE)) {
+    // we can align the start since the memid tracks the real base of the memory.
+    void* const aligned_start = _mi_align_up_ptr(start, MI_ARENA_SLICE_SIZE);
+    const size_t diff = (uint8_t*)aligned_start - (uint8_t*)start;
+    if (diff >= size || (size - diff) < MI_ARENA_SLICE_SIZE) {
+      _mi_warning_message("after alignment, the size of the arena becomes too small (memory at %p with size %zu)\n", start, size);
+      return false;
+    }
+    start = aligned_start;
+    size = size - diff;
+  }
+
+  const size_t slice_count = _mi_align_down(size / MI_ARENA_SLICE_SIZE, MI_BCHUNK_BITS);
+  if (slice_count > MI_BITMAP_MAX_BIT_COUNT) {  // 16 GiB for now
+    // todo: allow larger areas (either by splitting it up in arena's or having larger arena's)
+    _mi_warning_message("cannot use OS memory since it is too large (size %zu MiB, maximum is %zu MiB)", size/MI_MiB, mi_size_of_slices(MI_BITMAP_MAX_BIT_COUNT)/MI_MiB);
+    return false;
+  }
+  size_t bitmap_base;
+  const size_t info_slices = mi_arena_info_slices_needed(slice_count, &bitmap_base);
+  if (slice_count < info_slices+1) {
+    _mi_warning_message("cannot use OS memory since it is not large enough (size %zu KiB, minimum required is %zu KiB)", size/MI_KiB, mi_size_of_slices(info_slices+1)/MI_KiB);
+    return false;
+  }
+  else if (info_slices >= MI_ARENA_MAX_CHUNK_OBJ_SLICES) {
+    _mi_warning_message("cannot use OS memory since it is too large with respect to the maximum object size (size %zu MiB, meta-info slices %zu, maximum object slices are %zu)", size/MI_MiB, info_slices, MI_ARENA_MAX_CHUNK_OBJ_SLICES);
+    return false;
+  }
+
+  mi_arena_t* arena = (mi_arena_t*)start;
+
+  // commit & zero if needed
+  if (!memid.initially_committed) {
+    size_t commit_size = mi_size_of_slices(info_slices);
+    // leave a guard OS page decommitted at the end?
+    if (!memid.is_pinned) { commit_size -= _mi_os_secure_guard_page_size(); }
+    bool ok = false;
+    if (commit_fun != NULL) {
+      ok = (*commit_fun)(true /* commit */, arena, commit_size, NULL, commit_fun_arg);
+    }
+    else {
+      ok = _mi_os_commit(arena, commit_size, NULL);
+    }
+    if (!ok) {
+      _mi_warning_message("unable to commit meta-data for OS memory");
+      return false;
+    }
+  }
+  else if (!memid.is_pinned) {
+    // if MI_SECURE, set a guard page at the end
+    // todo: this does not respect the commit_fun as the memid is of external memory
+    _mi_os_secure_guard_page_set_before((uint8_t*)arena + mi_size_of_slices(info_slices), memid);
+  }
+  if (!memid.initially_zero) {
+    _mi_memzero(arena, mi_size_of_slices(info_slices) - _mi_os_secure_guard_page_size());
+  }
+
+  // init
+  arena->subproc      = subproc;
+  arena->memid        = memid;
+  arena->is_exclusive = exclusive;
+  arena->slice_count  = slice_count;
+  arena->info_slices  = info_slices;
+  arena->numa_node    = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
+  arena->purge_expire = 0;
+  arena->commit_fun   = commit_fun;
+  arena->commit_fun_arg = commit_fun_arg;
+  // mi_lock_init(&arena->abandoned_visit_lock);
+
+  // init bitmaps
+  uint8_t* base = mi_arena_start(arena) + bitmap_base;
+  arena->slices_free = mi_arena_bbitmap_init(slice_count,&base);
+  arena->slices_committed = mi_arena_bitmap_init(slice_count,&base);
+  arena->slices_dirty = mi_arena_bitmap_init(slice_count,&base);
+  arena->slices_purge = mi_arena_bitmap_init(slice_count, &base);
+  arena->pages_main.pages = mi_arena_bitmap_init(slice_count, &base);
+  for( size_t i = 0; i < MI_ARENA_BIN_COUNT; i++) {
+    arena->pages_main.pages_abandoned[i] = mi_arena_bitmap_init(slice_count,&base);
+  }
+  mi_assert_internal(mi_size_of_slices(info_slices) >= (size_t)(base - mi_arena_start(arena)));
+
+  // reserve our meta info (and reserve slices outside the memory area)
+  mi_bbitmap_unsafe_setN(arena->slices_free, info_slices /* start */, arena->slice_count - info_slices);
+  if (memid.initially_committed) {
+    mi_bitmap_unsafe_setN(arena->slices_committed, 0, arena->slice_count);
+  }
+  else {
+    mi_bitmap_setN(arena->slices_committed, 0, info_slices, NULL);
+  }
+  if (!memid.initially_zero) {
+    mi_bitmap_unsafe_setN(arena->slices_dirty, 0, arena->slice_count);
+  }
+  else {
+    mi_bitmap_setN(arena->slices_dirty, 0, info_slices, NULL);
+  }
+
+  return mi_arenas_add(subproc, arena, arena_id);
+}
+
+
+bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_pinned, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept {
+  mi_memid_t memid = _mi_memid_create(MI_MEM_EXTERNAL);
+  memid.mem.os.base = start;
+  memid.mem.os.size = size;
+  memid.initially_committed = is_committed;
+  memid.initially_zero = is_zero;
+  memid.is_pinned = is_pinned;
+  return mi_manage_os_memory_ex2(_mi_subproc(), start, size, numa_node, exclusive, memid, NULL, NULL, arena_id);
+}
+
+bool mi_manage_memory(void* start, size_t size, bool is_committed, bool is_zero, bool is_pinned, int numa_node, bool exclusive, mi_commit_fun_t* commit_fun, void* commit_fun_arg, mi_arena_id_t* arena_id) mi_attr_noexcept
+{
+  mi_memid_t memid = _mi_memid_create(MI_MEM_EXTERNAL);
+  memid.mem.os.base = start;
+  memid.mem.os.size = size;
+  memid.initially_committed = is_committed;
+  memid.initially_zero = is_zero;
+  memid.is_pinned = is_pinned;
+  return mi_manage_os_memory_ex2(_mi_subproc(), start, size, numa_node, exclusive, memid, commit_fun, commit_fun_arg, arena_id);
+}
+
+
+// Reserve a range of regular OS memory
+static int mi_reserve_os_memory_ex2(mi_subproc_t* subproc, size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) {
+  if (arena_id != NULL) *arena_id = _mi_arena_id_none();
+  size = _mi_align_up(size, MI_ARENA_SLICE_SIZE); // at least one slice
+  mi_memid_t memid;
+  void* start = _mi_os_alloc_aligned(size, MI_ARENA_SLICE_ALIGN, commit, allow_large, &memid);
+  if (start == NULL) return ENOMEM;
+  if (!mi_manage_os_memory_ex2(subproc, start, size, -1 /* numa node */, exclusive, memid, NULL, NULL, arena_id)) {
+    _mi_os_free_ex(start, size, commit, memid, NULL);
+    _mi_verbose_message("failed to reserve %zu KiB memory\n", _mi_divide_up(size, 1024));
+    return ENOMEM;
+  }
+  _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size, 1024), memid.is_pinned ? " (in large os pages)" : "");
+  // mi_debug_show_arenas(true, true, false);
+
+  return 0;
+}
+
+// Reserve a range of regular OS memory
+int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept {
+  return mi_reserve_os_memory_ex2(_mi_subproc(), size, commit, allow_large, exclusive, arena_id);
+}
+
+// Manage a range of regular OS memory
+bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept {
+  return mi_manage_os_memory_ex(start, size, is_committed, is_large, is_zero, numa_node, false /* exclusive? */, NULL);
+}
+
+// Reserve a range of regular OS memory
+int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept {
+  return mi_reserve_os_memory_ex(size, commit, allow_large, false, NULL);
+}
+
+
+/* -----------------------------------------------------------
+  Debugging
+----------------------------------------------------------- */
+
+// Return idx of the slice past the last used slice
+static size_t mi_arena_used_slices(mi_arena_t* arena) {
+  size_t idx;
+  if (mi_bitmap_bsr(arena->slices_dirty, &idx)) {
+    return (idx + 1);
+  }
+  else {
+    return mi_arena_info_slices(arena);
+  }
+}
+
+static size_t mi_debug_show_bfield(mi_bfield_t field, char* buf, size_t* k) {
+  size_t bit_set_count = 0;
+  for (int bit = 0; bit < MI_BFIELD_BITS; bit++) {
+    bool is_set = ((((mi_bfield_t)1 << bit) & field) != 0);
+    if (is_set) bit_set_count++;
+    buf[*k] = (is_set ? 'x' : '.');
+    *k = *k + 1;
+  }
+  return bit_set_count;
+}
+
+typedef enum mi_ansi_color_e {
+  MI_BLACK = 30,
+  MI_MAROON,
+  MI_DARKGREEN,
+  MI_ORANGE,
+  MI_NAVY,
+  MI_PURPLE,
+  MI_TEAL,
+  MI_GRAY,
+  MI_DARKGRAY = 90,
+  MI_RED,
+  MI_GREEN,
+  MI_YELLOW,
+  MI_BLUE,
+  MI_MAGENTA,
+  MI_CYAN,
+  MI_WHITE
+} mi_ansi_color_t;
+
+static void mi_debug_color(char* buf, size_t* k, mi_ansi_color_t color) {
+  *k += _mi_snprintf(buf + *k, 32, "\x1B[%dm", (int)color);
+}
+
+static int mi_page_commit_usage(mi_page_t* page) {
+  // if (mi_page_size(page) <= MI_PAGE_MIN_COMMIT_SIZE) return 100;
+  const size_t committed_size = mi_page_committed(page);
+  const size_t used_size = page->used * mi_page_block_size(page);
+  return (int)(used_size * 100 / committed_size);
+}
+
+static size_t mi_debug_show_page_bfield(char* buf, size_t* k, mi_arena_t* arena, size_t slice_index, long* pbit_of_page, mi_ansi_color_t* pcolor_of_page ) {
+  size_t bit_set_count = 0;
+  long bit_of_page = *pbit_of_page;
+  mi_ansi_color_t color = *pcolor_of_page;
+  mi_ansi_color_t prev_color = MI_GRAY;
+  for (int bit = 0; bit < MI_BFIELD_BITS; bit++, bit_of_page--) {
+    // bool is_set = ((((mi_bfield_t)1 << bit) & field) != 0);
+    void* start = mi_arena_slice_start(arena, slice_index + bit);
+    mi_page_t* page = _mi_safe_ptr_page(start);
+    char c = ' ';
+    if (start==page) {
+      mi_assert_internal(bit_of_page <= 0);
+      bit_set_count++;
+      c = 'p';
+      color = MI_GRAY;
+      if (mi_page_is_singleton(page)) { c = 's'; }
+      else if (mi_page_is_full(page)) { c = 'f'; }
+      if (!mi_page_is_abandoned(page)) { c = _mi_toupper(c); }
+      int commit_usage = mi_page_commit_usage(page);
+      if (commit_usage < 25) { color = MI_MAROON; }
+      else if (commit_usage < 50) { color = MI_ORANGE; }
+      else if (commit_usage < 75) { color = MI_TEAL; }
+      else color = MI_DARKGREEN;
+      bit_of_page = (long)page->memid.mem.arena.slice_count;
+    }
+    else {
+      c = '?';
+      if (bit_of_page > 0) { c = '-'; }
+      else if (_mi_meta_is_meta_page(start)) { c = 'm'; color = MI_GRAY; }
+      else if (slice_index + bit < arena->info_slices) { c = 'i'; color = MI_GRAY; }
+      // else if (mi_bitmap_is_setN(arena->pages_purge, slice_index + bit, NULL)) { c = '*'; }
+      else if (mi_bbitmap_is_setN(arena->slices_free, slice_index+bit,1)) {
+        if (mi_bitmap_is_set(arena->slices_purge, slice_index + bit)) { c = '~'; color = MI_ORANGE; }
+        else if (mi_bitmap_is_setN(arena->slices_committed, slice_index + bit, 1)) { c = '_'; color = MI_GRAY; }
+        else { c = '.'; color = MI_GRAY; }
+      }
+      if (bit==MI_BFIELD_BITS-1 && bit_of_page > 1) { c = '>'; }
+    }
+    if (color != prev_color) {
+      mi_debug_color(buf, k, color);
+      prev_color = color;
+    }
+    buf[*k] = c; *k += 1;
+  }
+  mi_debug_color(buf, k, MI_GRAY);
+  *pbit_of_page = bit_of_page;
+  *pcolor_of_page = color;
+  return bit_set_count;
+}
+
+static size_t mi_debug_show_chunks(const char* header1, const char* header2, const char* header3,
+                                   size_t slice_count, size_t chunk_count,
+                                   mi_bchunk_t* chunks, mi_bchunkmap_t* chunk_bins, bool invert, mi_arena_t* arena, bool narrow)
+{
+  _mi_raw_message("\x1B[37m%s%s%s (use/commit: \x1B[31m0 - 25%%\x1B[33m - 50%%\x1B[36m - 75%%\x1B[32m - 100%%\x1B[0m)\n", header1, header2, header3);
+  const size_t fields_per_line = (narrow ? 2 : 4);
+  const size_t used_slice_count = mi_arena_used_slices(arena);
+  size_t bit_count = 0;
+  size_t bit_set_count = 0;
+  long bit_of_page = 0;
+  mi_ansi_color_t color_of_page = MI_GRAY;
+  for (size_t i = 0; i < chunk_count && bit_count < slice_count; i++) {
+    char buf[5*MI_BCHUNK_BITS + 64]; _mi_memzero(buf, sizeof(buf));
+    if (bit_count > used_slice_count && i+2 < chunk_count) {
+      const size_t diff = chunk_count - 1 - i;
+      bit_count += diff*MI_BCHUNK_BITS;
+      _mi_raw_message("  |\n");
+      i = chunk_count-1;
+    }
+
+    size_t k = 0;
+
+    if (i<10)        { buf[k++] = ('0' + (char)i); buf[k++] = ' '; buf[k++] = ' '; }
+    else if (i<100)  { buf[k++] = ('0' + (char)(i/10)); buf[k++] = ('0' + (char)(i%10)); buf[k++] = ' '; }
+    else if (i<1000) { buf[k++] = ('0' + (char)(i/100)); buf[k++] = ('0' + (char)((i%100)/10)); buf[k++] = ('0' + (char)(i%10)); }
+
+    char chunk_kind = ' ';
+    if (chunk_bins != NULL) {
+      switch (mi_bbitmap_debug_get_bin(chunk_bins,i)) {
+        case MI_CBIN_SMALL:  chunk_kind = 'S'; break;
+        case MI_CBIN_MEDIUM: chunk_kind = 'M'; break;
+        case MI_CBIN_LARGE:  chunk_kind = 'L'; break;
+        case MI_CBIN_HUGE:   chunk_kind = 'H'; break;
+        case MI_CBIN_OTHER:  chunk_kind = 'X'; break;
+        default: chunk_kind = ' '; break; // suppress warning
+        // case MI_CBIN_NONE: chunk_kind = 'N'; break;
+      }
+    }
+    buf[k++] = chunk_kind;
+    buf[k++] = ' ';
+
+    for (size_t j = 0; j < MI_BCHUNK_FIELDS; j++) {
+      if (j > 0 && (j % fields_per_line) == 0) {
+        // buf[k++] = '\n'; _mi_memset(buf+k,' ',7); k += 7;
+        _mi_raw_message("  %s\n\x1B[37m", buf);
+        _mi_memzero(buf, sizeof(buf));
+        _mi_memset(buf, ' ', 5); k = 5;
+      }
+      if (bit_count < slice_count) {
+        mi_bfield_t bfield = 0;
+        if (chunks!=NULL) {
+          bfield = chunks[i].bfields[j];
+        }
+        if (invert) bfield = ~bfield;
+        size_t xcount = (chunks==NULL ? mi_debug_show_page_bfield(buf, &k, arena, bit_count, &bit_of_page, &color_of_page)
+                                      : mi_debug_show_bfield(bfield, buf, &k));
+        if (invert) xcount = MI_BFIELD_BITS - xcount;
+        bit_set_count += xcount;
+        buf[k++] = ' ';
+      }
+      else {
+        _mi_memset(buf + k, 'o', MI_BFIELD_BITS);
+        k += MI_BFIELD_BITS;
+      }
+      bit_count += MI_BFIELD_BITS;
+    }
+    _mi_raw_message("  %s\n\x1B[37m", buf);
+  }
+  _mi_raw_message("\x1B[0m  total pages: %zu\n", bit_set_count);
+  return bit_set_count;
+}
+
+//static size_t mi_debug_show_bitmap_binned(const char* header1, const char* header2, const char* header3, size_t slice_count,
+//                                           mi_bitmap_t* bitmap, mi_bchunkmap_t* chunk_bins, bool invert, mi_arena_t* arena, bool narrow) {
+//  return mi_debug_show_chunks(header1, header2, header3, slice_count, mi_bitmap_chunk_count(bitmap), &bitmap->chunks[0], chunk_bins, invert, arena, narrow);
+//}
+
+static void mi_debug_show_arenas_ex(mi_heap_t* heap, bool show_pages, bool narrow) mi_attr_noexcept {
+  mi_subproc_t* subproc = heap->subproc;
+  size_t max_arenas = mi_arenas_get_count(subproc);
+  //size_t free_total = 0;
+  //size_t slice_total = 0;
+  //size_t abandoned_total = 0;
+  size_t page_total = 0;
+  for (size_t i = 0; i < max_arenas; i++) {
+    mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &subproc->arenas[i]);
+    if (arena == NULL) break;
+    mi_assert(arena->subproc == subproc);
+    // slice_total += arena->slice_count;
+    _mi_raw_message("arena %zu at %p: %zu slices (%zu MiB)%s, subproc: %p\n", i, arena, arena->slice_count, (size_t)(mi_size_of_slices(arena->slice_count)/MI_MiB), (arena->memid.is_pinned ? ", pinned" : ""), arena->subproc);
+    //if (show_inuse) {
+    //  free_total += mi_debug_show_bbitmap("in-use slices", arena->slice_count, arena->slices_free, true, NULL);
+    //}
+    //if (show_committed) {
+    //  mi_debug_show_bitmap("committed slices", arena->slice_count, arena->slices_committed, false, NULL);
+    //}
+    // todo: abandoned slices
+    //if (show_purge) {
+    //  purge_total += mi_debug_show_bitmap("purgeable slices", arena->slice_count, arena->slices_purge, false, NULL);
+    //}
+    if (show_pages) {
+      // mi_arena_pages_t* arena_pages = mi_heap_arena_pages(heap, arena);
+      // if (arena_pages != NULL)
+      {
+        const char* header1 = "chunks (p:page, f:full, s:singleton, P,F,S:not abandoned, i:arena-info, m:meta-data, ~:free-purgable, _:free-committed, .:free-reserved)";
+        const char* header2 = (narrow ? "\n       " : " ");
+        const char* header3 = "(chunk bin: S:small, M : medium, L : large, X : other)";
+        page_total += mi_debug_show_chunks(header1, header2, header3, arena->slice_count,
+                                           mi_bbitmap_chunk_count(arena->slices_free), NULL,
+                                           arena->slices_free->chunkmap_bins, false, arena, narrow);
+      }
+    }
+  }
+  // if (show_inuse)     _mi_raw_message("total inuse slices    : %zu\n", slice_total - free_total);
+  // if (show_abandoned) _mi_raw_message("total abandoned slices: %zu\n", abandoned_total);
+  if (show_pages) _mi_raw_message("total pages in arenas: %zu\n", page_total);
+}
+
+void mi_debug_show_arenas(void) mi_attr_noexcept {
+  mi_debug_show_arenas_ex(mi_heap_main(), true /* show pages */, true /* narrow? */);
+}
+
+void mi_arenas_print(void) mi_attr_noexcept {
+  mi_debug_show_arenas();
+}
+
+
+/* -----------------------------------------------------------
+  Reserve a huge page arena.
+----------------------------------------------------------- */
+// reserve at a specific numa node
+int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept {
+  if (arena_id != NULL) *arena_id = NULL;
+  if (pages==0) return 0;
+  if (numa_node < -1) numa_node = -1;
+  if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count();
+  size_t hsize = 0;
+  size_t pages_reserved = 0;
+  mi_memid_t memid;
+  void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize, &memid);
+  if (p==NULL || pages_reserved==0) {
+    _mi_warning_message("failed to reserve %zu GiB huge pages\n", pages);
+    return ENOMEM;
+  }
+  _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages);
+
+  if (!mi_manage_os_memory_ex2(_mi_subproc(), p, hsize, numa_node, exclusive, memid, NULL, NULL, arena_id)) {
+    _mi_os_free(p, hsize, memid);
+    return ENOMEM;
+  }
+  return 0;
+}
+
+int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept {
+  return mi_reserve_huge_os_pages_at_ex(pages, numa_node, timeout_msecs, false, NULL);
+}
+
+// reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected)
+int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept {
+  if (pages == 0) return 0;
+
+  // pages per numa node
+  int numa_count = (numa_nodes > 0 && numa_nodes <= INT_MAX ? (int)numa_nodes : _mi_os_numa_node_count());
+  if (numa_count <= 0) { numa_count = 1; }
+  const size_t pages_per = pages / numa_count;
+  const size_t pages_mod = pages % numa_count;
+  const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50);
+
+  // reserve evenly among numa nodes
+  for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
+    size_t node_pages = pages_per;  // can be 0
+    if ((size_t)numa_node < pages_mod) { node_pages++; }
+    int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per);
+    if (err) return err;
+    if (pages < node_pages) {
+      pages = 0;
+    }
+    else {
+      pages -= node_pages;
+    }
+  }
+
+  return 0;
+}
+
+int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
+  MI_UNUSED(max_secs);
+  _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n");
+  if (pages_reserved != NULL) *pages_reserved = 0;
+  int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0));
+  if (err==0 && pages_reserved!=NULL) *pages_reserved = pages;
+  return err;
+}
+
+
+
+
+
+/* -----------------------------------------------------------
+  Arena purge
+----------------------------------------------------------- */
+
+static long mi_arena_purge_delay(void) {
+  // <0 = no purging allowed, 0=immediate purging, >0=milli-second delay
+  return (mi_option_get(mi_option_purge_delay) * mi_option_get(mi_option_arena_purge_mult));
+}
+
+// reset or decommit in an arena and update the commit bitmap
+// assumes we own the area (i.e. slices_free is claimed by us)
+// returns if the memory is no longer committed (versus reset which keeps the commit)
+static bool mi_arena_purge(mi_arena_t* arena, size_t slice_index, size_t slice_count) {
+  mi_assert_internal(!arena->memid.is_pinned);
+  mi_assert_internal(mi_bbitmap_is_clearN(arena->slices_free, slice_index, slice_count));
+
+  const size_t size = mi_size_of_slices(slice_count);
+  void* const p = mi_arena_slice_start(arena, slice_index);
+  //const bool all_committed = mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count);
+  size_t already_committed;
+  mi_bitmap_setN(arena->slices_committed, slice_index, slice_count, &already_committed); // pretend all committed.. (as we lack a clearN call that counts the already set bits..)
+  const bool all_committed = (already_committed == slice_count);
+  const bool needs_recommit = _mi_os_purge_ex(p, size, all_committed /* allow reset? */, mi_size_of_slices(already_committed), arena->commit_fun, arena->commit_fun_arg);
+
+  if (needs_recommit) {
+    // no longer committed
+    mi_bitmap_clearN(arena->slices_committed, slice_index, slice_count);
+    // we just counted in the purge to decommit all, but the some part was not committed so adjust that here
+    // mi_subproc_stat_decrease(arena->subproc, committed, mi_size_of_slices(slice_count - already_committed));
+  }
+  else if (!all_committed) {
+    // we cannot assume any of these are committed any longer (even with reset since we did setN and may have marked uncommitted slices as committed)
+    mi_bitmap_clearN(arena->slices_committed, slice_index, slice_count);
+    // we adjust the commit count as parts will be re-committed
+    // mi_subproc_stat_decrease(arena->subproc, committed, mi_size_of_slices(already_committed));
+  }
+
+  return needs_recommit;
+}
+
+
+// Schedule a purge. This is usually delayed to avoid repeated decommit/commit calls.
+// Note: assumes we (still) own the area as we may purge immediately
+static void mi_arena_schedule_purge(mi_arena_t* arena, size_t slice_index, size_t slice_count) {
+  const long delay = mi_arena_purge_delay();
+  if (arena->memid.is_pinned || delay < 0 || _mi_preloading()) return;  // is purging allowed at all?
+
+  mi_assert_internal(mi_bbitmap_is_clearN(arena->slices_free, slice_index, slice_count));
+  if (delay == 0) {
+    // purge directly
+    mi_arena_purge(arena, slice_index, slice_count);
+  }
+  else {
+    // schedule purge
+    const mi_msecs_t expire = _mi_clock_now() + delay;
+    mi_msecs_t expire0 = 0;
+    if (mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire0, expire)) {
+      // expiration was not yet set
+      // maybe set the global arenas expire as well (if it wasn't set already)
+      mi_assert_internal(expire0==0);
+      mi_atomic_casi64_strong_acq_rel(&arena->subproc->purge_expire, &expire0, expire);
+    }
+    else {
+      // already an expiration was set
+    }
+    mi_bitmap_setN(arena->slices_purge, slice_index, slice_count, NULL);
+  }
+}
+
+typedef struct mi_purge_visit_info_s {
+  mi_msecs_t now;
+  mi_msecs_t delay;
+  bool all_purged;
+  bool any_purged;
+} mi_purge_visit_info_t;
+
+static bool mi_arena_try_purge_range(mi_arena_t* arena, size_t slice_index, size_t slice_count) {
+  mi_assert(slice_count < MI_BCHUNK_BITS);
+  if (mi_bbitmap_try_clearNC(arena->slices_free, slice_index, slice_count)) {
+    // purge
+    bool decommitted = mi_arena_purge(arena, slice_index, slice_count); MI_UNUSED(decommitted);
+    mi_assert_internal(!decommitted || mi_bitmap_is_clearN(arena->slices_committed, slice_index, slice_count));
+    // and reset the free range
+    mi_bbitmap_setN(arena->slices_free, slice_index, slice_count);
+    return true;
+  }
+  else {
+    // was allocated again already
+    return false;
+  }
+}
+
+static bool mi_arena_try_purge_visitor(size_t slice_index, size_t slice_count, mi_arena_t* arena, void* arg) {
+  mi_purge_visit_info_t* vinfo = (mi_purge_visit_info_t*)arg;
+  // try to purge: first claim the free blocks
+  if (mi_arena_try_purge_range(arena, slice_index, slice_count)) {
+    vinfo->any_purged = true;
+    vinfo->all_purged = true;
+  }
+  else if (slice_count > 1)
+  {
+    // failed to claim the full range, try per slice instead
+    for (size_t i = 0; i < slice_count; i++) {
+      const bool purged = mi_arena_try_purge_range(arena, slice_index + i, 1);
+      vinfo->any_purged = vinfo->any_purged || purged;
+      vinfo->all_purged = vinfo->all_purged && purged;
+    }
+  }
+  // don't clear the purge bits as that is done atomically be the _bitmap_forall_set_ranges
+  // mi_bitmap_clearN(arena->slices_purge, slice_index, slice_count);
+  return true; // continue
+}
+
+// returns true if anything was purged
+static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force)
+{
+  // check pre-conditions
+  if (arena->memid.is_pinned) return false;
+
+  // expired yet?
+  mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire);
+  if (!force && (expire == 0 || expire > now)) return false;
+
+  // reset expire
+  mi_atomic_storei64_release(&arena->purge_expire, (mi_msecs_t)0);
+  mi_subproc_stat_counter_increase(arena->subproc, arena_purges, 1);
+
+  // go through all purge info's  (with max MI_BFIELD_BITS ranges at a time)
+  // this also clears those ranges atomically (so any newly freed blocks will get purged next
+  // time around)
+  mi_purge_visit_info_t vinfo = { now, mi_arena_purge_delay(), true /*all?*/, false /*any?*/};
+
+  // we purge by at least `minslices` to not fragment transparent huge pages for example
+  const size_t minslices = mi_slice_count_of_size(_mi_os_minimal_purge_size());
+  _mi_bitmap_forall_setc_rangesn(arena->slices_purge, minslices, &mi_arena_try_purge_visitor, arena, &vinfo);
+
+  return vinfo.any_purged;
+}
+
+
+static void mi_arenas_try_purge(bool force, bool visit_all, mi_subproc_t* subproc, size_t tseq)
+{
+  // try purge can be called often so try to only run when needed
+  const long delay = mi_arena_purge_delay();
+  if (_mi_preloading() || delay <= 0) return;  // nothing will be scheduled
+
+  // check if any arena needs purging?
+  const mi_msecs_t now = _mi_clock_now();
+  const mi_msecs_t arenas_expire = mi_atomic_loadi64_acquire(&subproc->purge_expire);
+  if (!visit_all && !force && (arenas_expire == 0 || arenas_expire > now)) return;
+
+  const size_t max_arena = mi_arenas_get_count(subproc);
+  if (max_arena == 0) return;
+
+  // allow only one thread to purge at a time (todo: allow concurrent purging?)
+  static mi_atomic_guard_t purge_guard;
+  mi_atomic_guard(&purge_guard)
+  {
+    // increase global expire: at most one purge per delay cycle
+    if (arenas_expire > now) { mi_atomic_storei64_release(&subproc->purge_expire, now + (delay/10)); }
+    const size_t arena_start = tseq % max_arena;
+    size_t max_purge_count = (visit_all ? max_arena : (max_arena/4)+1);
+    bool all_visited = true;
+    bool any_purged = false;
+    for (size_t _i = 0; _i < max_arena; _i++) {
+      size_t i = _i + arena_start;
+      if (i >= max_arena) { i -= max_arena; }
+      mi_arena_t* arena = mi_arena_from_index(subproc,i);
+      if (arena != NULL) {
+        if (mi_arena_try_purge(arena, now, force)) {
+          any_purged = true;
+          if (max_purge_count <= 1) {
+            all_visited = false;
+            break;
+          }
+          max_purge_count--;
+        }
+      }
+    }
+    if (all_visited && !any_purged) {
+      mi_atomic_storei64_release(&subproc->purge_expire, 0);
+    }
+  }
+}
+
+
+/* -----------------------------------------------------------
+  Visit all pages and blocks in a heap
+----------------------------------------------------------- */
+
+typedef struct mi_heap_visit_info_s {
+  mi_heap_t* heap;
+  mi_block_visit_fun* visitor;
+  void* arg;
+  bool visit_blocks;
+} mi_heap_visit_info_t;
+
+static bool mi_heap_visit_page(mi_page_t* page, mi_heap_visit_info_t* vinfo) {
+  mi_heap_area_t area;
+  _mi_heap_area_init(&area, page);
+  mi_assert_internal(vinfo->heap == mi_page_heap(page));
+  if (!vinfo->visitor(vinfo->heap, &area, NULL, area.block_size, vinfo->arg)) {
+    return false;
+  }
+  if (vinfo->visit_blocks) {
+    return _mi_theap_area_visit_blocks(&area, page, vinfo->visitor, vinfo->arg);
+  }
+  else {
+    return true;
+  }
+}
+
+static bool mi_heap_visit_page_at(size_t slice_index, size_t slice_count, mi_arena_t* arena, void* arg) {
+  MI_UNUSED(slice_count);
+  mi_heap_visit_info_t* vinfo = (mi_heap_visit_info_t*)arg;
+  mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index);
+  return mi_heap_visit_page(page, vinfo);
+}
+
+bool _mi_heap_visit_blocks(mi_heap_t* heap, bool abandoned_only, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
+  mi_assert(visitor!=NULL);
+  if (visitor==NULL) return false;
+  if (heap==NULL) { heap = mi_heap_main(); }
+  // visit all pages in a heap
+  // we don't have to claim because we assume we are the only thread running (with this heap).
+  // (but we could atomically claim as well by first doing abandoned_reclaim and afterwards reabandoning).
+  mi_heap_visit_info_t visit_info = { heap, visitor, arg, visit_blocks };
+  bool ok = true;
+  mi_forall_arenas(heap, NULL, 0, arena) {
+    mi_arena_pages_t* arena_pages = mi_heap_arena_pages(heap, arena);
+    if (ok && arena_pages != NULL) {
+      if (abandoned_only) {
+        for (size_t bin = 0; ok && bin < MI_BIN_COUNT; bin++) {
+          // todo: if we had a single abandoned page map as well, this can be faster.
+          if (mi_atomic_load_relaxed(&heap->abandoned_count[bin]) > 0) {
+            ok = _mi_bitmap_forall_set(arena_pages->pages_abandoned[bin], &mi_heap_visit_page_at, arena, &visit_info);
+          }
+        }
+      }
+      else {
+        ok = _mi_bitmap_forall_set(arena_pages->pages, &mi_heap_visit_page_at, arena, &visit_info);
+      }
+    }
+  }
+  mi_forall_arenas_end();
+  if (!ok) return false;
+
+  // visit abandoned pages in OS allocated memory
+  // (technically we don't need the initial lock as we assume we are the only thread running in this subproc)
+  mi_page_t* page = NULL;
+  mi_lock(&heap->os_abandoned_pages_lock) {
+    page = heap->os_abandoned_pages;
+  }
+  while (ok && page != NULL) {
+    mi_page_t* next = page->next;  // read upfront in case the visitor frees the page
+    ok = mi_heap_visit_page(page, &visit_info);
+    page = next;
+  }
+
+  return ok;
+}
+
+bool mi_heap_visit_blocks(mi_heap_t* heap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
+  return _mi_heap_visit_blocks(heap, false, visit_blocks, visitor, arg);
+}
+
+bool mi_heap_visit_abandoned_blocks(mi_heap_t* heap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
+  return _mi_heap_visit_blocks(heap, true, visit_blocks, visitor, arg);
+}
+
+
+typedef struct mi_heap_delete_visit_info_s {
+  mi_heap_t*  heap_target;
+  mi_theap_t* theap_target;
+  mi_theap_t* theap;
+} mi_heap_delete_visit_info_t;
+
+static bool mi_heap_delete_page(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg) {
+  MI_UNUSED(block); MI_UNUSED(block_size); MI_UNUSED(heap);
+  mi_heap_delete_visit_info_t* info = (mi_heap_delete_visit_info_t*)arg;
+  mi_heap_t*  heap_target           = info->heap_target;
+  mi_theap_t* const theap           = info->theap;       mi_assert_internal(theap->heap == heap);
+  mi_page_t*  const page            = (mi_page_t*)area->reserved1;
+
+  mi_page_claim_ownership(page);       // claim ownership
+  if (mi_page_is_abandoned(page)) {
+    _mi_arenas_page_unabandon(page,theap);
+  }
+  else {
+    page->next = page->prev = NULL;    // yikes.. better not to try to access this from a thread later on..
+    mi_page_set_theap(page,NULL);      // set threadid to abandoned
+  }
+  mi_assert_internal(mi_page_is_abandoned(page));
+  mi_assert_internal(mi_page_is_owned(page));
+
+  if (page->used==0) {
+    // free the page
+    _mi_arenas_page_free(page, theap);
+  }
+  else if (heap_target==NULL) {
+    // destroy the page
+    page->used=0;                        // note: invariant `|local_free| + |free| == reserved - used`  does not hold in this case
+    _mi_arenas_page_free(page, theap);
+  }
+  else {
+    // move the page to `heap_target` as an abandoned page
+    // first remove it from the current heap
+    const size_t sbin = _mi_page_stats_bin(page);
+    size_t slice_index;
+    size_t slice_count;
+    mi_arena_pages_t* arena_pages = NULL;
+    mi_arena_t* const arena = mi_page_arena_pages(page, &slice_index, &slice_count, &arena_pages);
+    mi_assert_internal(mi_bitmap_is_set(arena_pages->pages, slice_index));
+    mi_bitmap_clear(arena_pages->pages, slice_index);
+    mi_theap_stat_decrease(theap, page_bins[sbin], 1);
+    mi_theap_stat_decrease(theap, pages, 1);
+    mi_theap_t* theap_target = info->theap_target;
+
+    // and then add it to the new target heap
+    mi_arena_pages_t* arena_pages_target = mi_heap_ensure_arena_pages(heap_target, arena);
+    if mi_unlikely(arena_pages_target==NULL) {
+      // if we cannot allocate this, we move it to the main heap instead (which does not require allocation)
+      heap_target = mi_heap_main();
+      theap_target = mi_heap_theap(heap_target);
+      arena_pages_target = mi_heap_ensure_arena_pages(heap_target, arena);
+      mi_assert_internal(arena_pages_target!=NULL);
+    }
+    mi_assert_internal(mi_bitmap_is_clear(arena_pages_target->pages, slice_index));
+    mi_bitmap_set(arena_pages_target->pages, slice_index);
+    page->heap = heap_target;
+    mi_theap_stat_increase(theap_target, page_bins[sbin], 1);
+    mi_theap_stat_increase(theap_target, pages, 1);
+
+    // and abandon in the new heap
+    _mi_arenas_page_abandon(page,theap_target);
+  }
+  return true;
+}
+
+static void mi_heap_delete_pages(mi_heap_t* heap, mi_heap_t* heap_target) {
+  mi_theap_t* const theap_target = (heap_target != NULL ? _mi_heap_theap(heap_target) : NULL);
+  mi_theap_t* const theap = _mi_heap_theap(heap);
+  mi_heap_delete_visit_info_t info = { heap_target, theap_target, theap };
+  _mi_heap_visit_blocks(heap, false, false, &mi_heap_delete_page, &info);
+  #if MI_DEBUG>1
+  // no more arena pages?
+  for (size_t i = 0; i < MI_ARENA_BIN_COUNT; i++) {
+    mi_arena_pages_t* const arena_pages = mi_atomic_load_relaxed(&heap->arena_pages[i]);
+    if (arena_pages!=NULL) {
+      mi_assert_internal(mi_bitmap_is_all_clear(arena_pages->pages));
+    }
+  }
+  // nor os abandoned pages?
+  mi_lock(&heap->os_abandoned_pages_lock) {
+
+    mi_assert_internal(heap->os_abandoned_pages == NULL);
+  }
+  // nor arena abandoned pages?
+  for (size_t i = 0; i < MI_BIN_COUNT; i++) {
+    mi_assert_internal(mi_atomic_load_relaxed(&heap->abandoned_count[i])==0);
+  }
+  #endif
+}
+
+void _mi_heap_move_pages(mi_heap_t* heap_from, mi_heap_t* heap_to) {
+  if (_mi_is_heap_main(heap_from)) return;
+  if (heap_to==NULL) { heap_to = mi_heap_main(); }
+  mi_heap_delete_pages(heap_from, heap_to);
+}
+
+void _mi_heap_destroy_pages(mi_heap_t* heap_from) {
+  if (_mi_is_heap_main(heap_from)) return;
+  mi_heap_delete_pages(heap_from, NULL);
+}
+
+/* -----------------------------------------------------------
+  Unloading and reloading an arena.
+----------------------------------------------------------- */
+/*
+static bool mi_arena_page_register(size_t slice_index, size_t slice_count, mi_arena_t* arena, void* arg) {
+  MI_UNUSED(arg); MI_UNUSED(slice_count);
+  mi_assert_internal(slice_count == 1);
+  mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index);
+  mi_assert_internal(mi_bitmap_is_setN(page->memid.mem.arena.arena->pages, page->memid.mem.arena.slice_index, 1));
+  if (!_mi_page_map_register(page)) return false; // break
+  mi_assert_internal(_mi_ptr_page(page)==page);
+  return true;
+}
+
+mi_decl_nodiscard static bool mi_arena_pages_reregister(mi_arena_t* arena) {
+  return _mi_bitmap_forall_set(arena->pages, &mi_arena_page_register, arena, NULL);
+}
+
+mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t* accessed_size, size_t* full_size) {
+  mi_arena_t* arena = _mi_arena_from_id(arena_id);
+  if (arena==NULL) {
+    return false;
+  }
+  else if (!arena->is_exclusive) {
+    _mi_warning_message("cannot unload a non-exclusive arena (id %zu at %p)\n", arena_id, arena);
+    return false;
+  }
+  else if (arena->memid.memkind != MI_MEM_EXTERNAL) {
+    _mi_warning_message("can only unload managed arena's for external memory (id %zu at %p)\n", arena_id, arena);
+    return false;
+  }
+
+  // find accessed size
+  const size_t asize = mi_size_of_slices(mi_arena_used_slices(arena));
+  if (base != NULL) { *base = (void*)arena; }
+  if (full_size != NULL) { *full_size = arena->memid.mem.os.size;  }
+  if (accessed_size != NULL) { *accessed_size = asize; }
+
+  // adjust abandoned page count
+  mi_subproc_t* const subproc = arena->subproc;
+  for (size_t bin = 0; bin < MI_BIN_COUNT; bin++) {
+    const size_t count = mi_bitmap_popcount(arena->pages_abandoned[bin]);
+    if (count > 0) { mi_atomic_decrement_acq_rel(&subproc->abandoned_count[bin]); }
+  }
+
+  // unregister the pages
+  _mi_page_map_unregister_range(arena, asize);
+
+  // set arena entry to NULL
+  const size_t count = mi_arenas_get_count(subproc);
+  for(size_t i = 0; i < count; i++) {
+    if (mi_arena_from_index(subproc, i) == arena) {
+      mi_atomic_store_ptr_release(mi_arena_t, &subproc->arenas[i], NULL);
+      if (i + 1 == count) { // try adjust the count?
+        size_t expected = count;
+        mi_atomic_cas_strong_acq_rel(&subproc->arena_count, &expected, count-1);
+      }
+      break;
+    }
+  }
+  return true;
+}
+
+mi_decl_export bool mi_arena_reload(void* start, size_t size, mi_commit_fun_t* commit_fun, void* commit_fun_arg, mi_arena_id_t* arena_id) {
+  // assume the memory area is already containing the arena
+  if (arena_id != NULL) { *arena_id = _mi_arena_id_none(); }
+  if (start == NULL || size == 0) return false;
+  mi_arena_t* arena = (mi_arena_t*)start;
+  mi_memid_t memid = arena->memid;
+  if (memid.memkind != MI_MEM_EXTERNAL) {
+    _mi_warning_message("can only reload arena's from external memory (%p)\n", arena);
+    return false;
+  }
+  if (memid.mem.os.base != start) {
+    _mi_warning_message("the reloaded arena base address differs from the external memory (arena: %p, external: %p)\n", arena, start);
+    return false;
+  }
+  if (memid.mem.os.size != size) {
+    _mi_warning_message("the reloaded arena size differs from the external memory (arena size: %zu, external size: %zu)\n", arena->memid.mem.os.size, size);
+    return false;
+  }
+  if (!arena->is_exclusive) {
+    _mi_warning_message("the reloaded arena is not exclusive\n");
+    return false;
+  }
+
+  // re-initialize
+  arena->is_exclusive = true;
+  arena->commit_fun = commit_fun;
+  arena->commit_fun_arg = commit_fun_arg;
+  arena->subproc = _mi_subproc();
+  if (!mi_arenas_add(arena->subproc, arena, arena_id)) {
+    return false;
+  }
+  if (!mi_arena_pages_reregister(arena)) {
+    // todo: clear arena entry in the subproc?
+    return false;
+  }
+
+  // adjust abandoned page count
+  for (size_t bin = 0; bin < MI_BIN_COUNT; bin++) {
+    const size_t count = mi_bitmap_popcount(arena->pages_abandoned[bin]);
+    if (count > 0) { mi_atomic_decrement_acq_rel(&arena->subproc->abandoned_count[bin]); }
+  }
+
+  return true;
+}
+
+*/
diff --git a/3rd/mimalloc-3.2.6/src/bitmap.c b/3rd/mimalloc-3.2.6/src/bitmap.c
new file mode 100644
index 00000000..b90f5b18
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/bitmap.c
@@ -0,0 +1,1882 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2019-2024 Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+/* ----------------------------------------------------------------------------
+Concurrent bitmap that can set/reset sequences of bits atomically
+---------------------------------------------------------------------------- */
+
+#include "mimalloc.h"
+#include "mimalloc/internal.h"
+#include "mimalloc/bits.h"
+#include "bitmap.h"
+
+#ifndef MI_OPT_SIMD
+#define MI_OPT_SIMD   0
+#endif
+
+/* --------------------------------------------------------------------------------
+  bfields
+-------------------------------------------------------------------------------- */
+
+static inline size_t mi_bfield_ctz(mi_bfield_t x) {
+  return mi_ctz(x);
+}
+
+static inline size_t mi_bfield_clz(mi_bfield_t x) {
+  return mi_clz(x);
+}
+
+static inline size_t mi_bfield_popcount(mi_bfield_t x) {
+  return mi_popcount(x);
+}
+
+static inline mi_bfield_t mi_bfield_clear_least_bit(mi_bfield_t x) {
+  return (x & (x-1));
+}
+
+// find the least significant bit that is set (i.e. count trailing zero's)
+// return false if `x==0` (with `*idx` undefined) and true otherwise,
+// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
+static inline bool mi_bfield_find_least_bit(mi_bfield_t x, size_t* idx) {
+  return mi_bsf(x,idx);
+}
+
+// find the most significant bit that is set.
+// return false if `x==0` (with `*idx` undefined) and true otherwise,
+// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
+static inline bool mi_bfield_find_highest_bit(mi_bfield_t x, size_t* idx) {
+  return mi_bsr(x, idx);
+}
+
+
+
+// find each set bit in a bit field `x` and clear it, until it becomes zero.
+static inline bool mi_bfield_foreach_bit(mi_bfield_t* x, size_t* idx) {
+  const bool found = mi_bfield_find_least_bit(*x, idx);
+  *x = mi_bfield_clear_least_bit(*x);
+  return found;
+}
+
+static inline mi_bfield_t mi_bfield_zero(void) {
+  return 0;
+}
+
+static inline mi_bfield_t mi_bfield_one(void) {
+  return 1;
+}
+
+static inline mi_bfield_t mi_bfield_all_set(void) {
+  return ~((mi_bfield_t)0);
+}
+
+// mask of `bit_count` bits set shifted to the left by `shiftl`
+static inline mi_bfield_t mi_bfield_mask(size_t bit_count, size_t shiftl) {
+  mi_assert_internal(bit_count > 0);
+  mi_assert_internal(bit_count + shiftl <= MI_BFIELD_BITS);
+  const mi_bfield_t mask0 = (bit_count < MI_BFIELD_BITS ? (mi_bfield_one() << bit_count)-1 : mi_bfield_all_set());
+  return (mask0 << shiftl);
+}
+
+
+// ------- mi_bfield_atomic_set ---------------------------------------
+// the `_set` functions return also the count of bits that were already set (for commit statistics)
+// the `_clear` functions return also whether the new bfield is all clear or not (for the chunk_map)
+
+// Set a bit atomically. Returns `true` if the bit transitioned from 0 to 1
+static inline bool mi_bfield_atomic_set(_Atomic(mi_bfield_t)*b, size_t idx) {
+  mi_assert_internal(idx < MI_BFIELD_BITS);
+  const mi_bfield_t mask = mi_bfield_mask(1, idx);;
+  const mi_bfield_t old = mi_atomic_or_acq_rel(b, mask);
+  return ((old&mask) == 0);
+}
+
+// Clear a bit atomically. Returns `true` if the bit transitioned from 1 to 0.
+// `all_clear` is set if the new bfield is zero.
+static inline bool mi_bfield_atomic_clear(_Atomic(mi_bfield_t)*b, size_t idx, bool* all_clear) {
+  mi_assert_internal(idx < MI_BFIELD_BITS);
+  const mi_bfield_t mask = mi_bfield_mask(1, idx);;
+  mi_bfield_t old = mi_atomic_and_acq_rel(b, ~mask);
+  if (all_clear != NULL) { *all_clear = ((old&~mask)==0); }
+  return ((old&mask) == mask);
+}
+
+// Clear a bit but only when/once it is set. This is used by concurrent free's while
+// the page is abandoned and mapped. This can incure a busy wait :-( but it should
+// happen almost never (and is accounted for in the stats)
+static inline void mi_bfield_atomic_clear_once_set(_Atomic(mi_bfield_t)*b, size_t idx) {
+  mi_assert_internal(idx < MI_BFIELD_BITS);
+  const mi_bfield_t mask = mi_bfield_mask(1, idx);;
+  mi_bfield_t old = mi_atomic_load_relaxed(b);
+  do {
+    if mi_unlikely((old&mask) == 0) {
+      old = mi_atomic_load_acquire(b);
+      if ((old&mask)==0) {
+        mi_subproc_stat_counter_increase(_mi_subproc(), pages_unabandon_busy_wait, 1);
+      }
+      while ((old&mask)==0) { // busy wait
+        mi_atomic_yield();
+        old = mi_atomic_load_acquire(b);
+      }
+    }
+  } while (!mi_atomic_cas_weak_acq_rel(b,&old, (old&~mask)));
+  mi_assert_internal((old&mask)==mask);  // we should only clear when it was set
+}
+
+// Set a mask set of bits atomically, and return true of the mask bits transitioned from all 0's to 1's.
+// `already_set` contains the count of bits that were already set (used when committing ranges to account
+// statistics correctly).
+static inline bool mi_bfield_atomic_set_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t mask, size_t* already_set) {
+  mi_assert_internal(mask != 0);
+  mi_bfield_t old = mi_atomic_load_relaxed(b);
+  while (!mi_atomic_cas_weak_acq_rel(b, &old, old|mask)) {};  // try to atomically set the mask bits until success
+  if (already_set!=NULL) { *already_set = mi_bfield_popcount(old&mask); }
+  return ((old&mask) == 0);
+}
+
+// Clear a mask set of bits atomically, and return true of the mask bits transitioned from all 1's to 0's
+// `all_clear` is set to `true` if the new bfield became zero.
+static inline bool mi_bfield_atomic_clear_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* all_clear) {
+  mi_assert_internal(mask != 0);
+  mi_bfield_t old = mi_atomic_load_relaxed(b);
+  while (!mi_atomic_cas_weak_acq_rel(b, &old, old&~mask)) {};  // try to atomically clear the mask bits until success
+  if (all_clear != NULL) { *all_clear = ((old&~mask)==0); }
+  return ((old&mask) == mask);
+}
+
+static inline bool mi_bfield_atomic_setX(_Atomic(mi_bfield_t)*b, size_t* already_set) {
+  const mi_bfield_t old = mi_atomic_exchange_release(b, mi_bfield_all_set());
+  if (already_set!=NULL) { *already_set = mi_bfield_popcount(old); }
+  return (old==0);
+}
+
+// static inline bool mi_bfield_atomic_clearX(_Atomic(mi_bfield_t)*b, bool* all_clear) {
+//   const mi_bfield_t old = mi_atomic_exchange_release(b, mi_bfield_zero());
+//   if (all_clear!=NULL) { *all_clear = true; }
+//   return (~old==0);
+// }
+
+// ------- mi_bfield_atomic_try_clear ---------------------------------------
+
+
+// Tries to clear a mask atomically, and returns true if the mask bits atomically transitioned from mask to 0
+// and false otherwise (leaving the bit field as is).
+// `all_clear` is set to `true` if the new bfield became zero.
+static inline bool mi_bfield_atomic_try_clear_mask_of(_Atomic(mi_bfield_t)*b, mi_bfield_t mask, mi_bfield_t expect, bool* all_clear) {
+  mi_assert_internal(mask != 0);
+  // try to atomically clear the mask bits
+  do {
+    if ((expect & mask) != mask) {  // are all bits still set?
+      if (all_clear != NULL) { *all_clear = (expect == 0); }
+      return false;
+    }
+  } while (!mi_atomic_cas_weak_acq_rel(b, &expect, expect & ~mask));
+  if (all_clear != NULL) { *all_clear = ((expect & ~mask) == 0);  }
+  return true;
+}
+
+static inline bool mi_bfield_atomic_try_clear_mask(_Atomic(mi_bfield_t)* b, mi_bfield_t mask, bool* all_clear) {
+  mi_assert_internal(mask != 0);
+  const mi_bfield_t expect = mi_atomic_load_relaxed(b);
+  return mi_bfield_atomic_try_clear_mask_of(b, mask, expect, all_clear);
+}
+
+// Tries to clear a bit atomically. Returns `true` if the bit transitioned from 1 to 0
+// and `false` otherwise leaving the bfield `b` as-is.
+// `all_clear` is set to true if the new bfield became zero (and false otherwise)
+mi_decl_maybe_unused static inline bool mi_bfield_atomic_try_clear(_Atomic(mi_bfield_t)* b, size_t idx, bool* all_clear) {
+  mi_assert_internal(idx < MI_BFIELD_BITS);
+  const mi_bfield_t mask = mi_bfield_one()<<idx;
+  return mi_bfield_atomic_try_clear_mask(b, mask, all_clear);
+}
+
+// Tries to clear a byte atomically, and returns true if the byte atomically transitioned from 0xFF to 0
+// `all_clear` is set to true if the new bfield became zero (and false otherwise)
+mi_decl_maybe_unused static inline bool mi_bfield_atomic_try_clear8(_Atomic(mi_bfield_t)*b, size_t idx, bool* all_clear) {
+  mi_assert_internal(idx < MI_BFIELD_BITS);
+  mi_assert_internal((idx%8)==0);
+  const mi_bfield_t mask = ((mi_bfield_t)0xFF)<<idx;
+  return mi_bfield_atomic_try_clear_mask(b, mask, all_clear);
+}
+
+// Try to clear a full field of bits atomically, and return true all bits transitioned from all 1's to 0's.
+// and false otherwise leaving the bit field as-is.
+// `all_clear` is set to true if the new bfield became zero (which is always the case if successful).
+static inline bool mi_bfield_atomic_try_clearX(_Atomic(mi_bfield_t)*b, bool* all_clear) {
+  mi_bfield_t old = mi_bfield_all_set();
+  if (mi_atomic_cas_strong_acq_rel(b, &old, mi_bfield_zero())) {
+    if (all_clear != NULL) { *all_clear = true; }
+    return true;
+  }
+  else return false;
+}
+
+
+// ------- mi_bfield_atomic_is_set ---------------------------------------
+
+// Check if a bit is set
+static inline bool mi_bfield_atomic_is_set(const _Atomic(mi_bfield_t)*b, const size_t idx) {
+  const mi_bfield_t x = mi_atomic_load_relaxed(b);
+  return ((x & mi_bfield_mask(1,idx)) != 0);
+}
+
+// Check if a bit is clear
+static inline bool mi_bfield_atomic_is_clear(const _Atomic(mi_bfield_t)*b, const size_t idx) {
+  const mi_bfield_t x = mi_atomic_load_relaxed(b);
+  return ((x & mi_bfield_mask(1, idx)) == 0);
+}
+
+// Check if a bit is xset
+static inline bool mi_bfield_atomic_is_xset(mi_xset_t set, const _Atomic(mi_bfield_t)*b, const size_t idx) {
+  if (set) return mi_bfield_atomic_is_set(b, idx);
+      else return mi_bfield_atomic_is_clear(b, idx);
+}
+
+// Check if all bits corresponding to a mask are set.
+static inline bool mi_bfield_atomic_is_set_mask(const _Atomic(mi_bfield_t)* b, mi_bfield_t mask) {
+  mi_assert_internal(mask != 0);
+  const mi_bfield_t x = mi_atomic_load_relaxed(b);
+  return ((x & mask) == mask);
+}
+
+// Check if all bits corresponding to a mask are clear.
+static inline bool mi_bfield_atomic_is_clear_mask(const _Atomic(mi_bfield_t)* b, mi_bfield_t mask) {
+  mi_assert_internal(mask != 0);
+  const mi_bfield_t x = mi_atomic_load_relaxed(b);
+  return ((x & mask) == 0);
+}
+
+// Check if all bits corresponding to a mask are set/cleared.
+static inline bool mi_bfield_atomic_is_xset_mask(mi_xset_t set, const _Atomic(mi_bfield_t)* b, mi_bfield_t mask) {
+  mi_assert_internal(mask != 0);
+  if (set) return mi_bfield_atomic_is_set_mask(b, mask);
+      else return mi_bfield_atomic_is_clear_mask(b, mask);
+}
+
+// Count bits in a mask
+static inline size_t mi_bfield_atomic_popcount_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t mask) {
+  const mi_bfield_t x = mi_atomic_load_relaxed(b);
+  return mi_bfield_popcount(x & mask);
+}
+
+
+/* --------------------------------------------------------------------------------
+ bitmap chunks
+-------------------------------------------------------------------------------- */
+
+// ------- mi_bchunk_set ---------------------------------------
+
+// Set a single bit
+static inline bool mi_bchunk_set(mi_bchunk_t* chunk, size_t cidx, size_t* already_set) {
+  mi_assert_internal(cidx < MI_BCHUNK_BITS);
+  const size_t i = cidx / MI_BFIELD_BITS;
+  const size_t idx = cidx % MI_BFIELD_BITS;
+  const bool was_clear = mi_bfield_atomic_set(&chunk->bfields[i], idx);
+  if (already_set != NULL) { *already_set = (was_clear ? 0 : 1); }
+  return was_clear;
+}
+
+// Set `0 < n <= MI_BFIELD_BITS`, and return true of the mask bits transitioned from all 0's to 1's.
+// `already_set` contains the count of bits that were already set (used when committing ranges to account
+// statistics correctly).
+// Can cross over two bfields.
+static inline bool mi_bchunk_setNX(mi_bchunk_t* chunk, size_t cidx, size_t n, size_t* already_set) {
+  mi_assert_internal(cidx < MI_BCHUNK_BITS);
+  mi_assert_internal(n > 0 && n <= MI_BFIELD_BITS);
+  const size_t i = cidx / MI_BFIELD_BITS;
+  const size_t idx = cidx % MI_BFIELD_BITS;
+  if mi_likely(idx + n <= MI_BFIELD_BITS) {
+    // within one field
+    return mi_bfield_atomic_set_mask(&chunk->bfields[i], mi_bfield_mask(n,idx), already_set);
+  }
+  else {
+    // spanning two fields
+    const size_t m = MI_BFIELD_BITS - idx;  // bits to clear in the first field
+    mi_assert_internal(m < n);
+    mi_assert_internal(i < MI_BCHUNK_FIELDS - 1);
+    mi_assert_internal(idx + m <= MI_BFIELD_BITS);
+    size_t already_set1;
+    const bool all_set1 = mi_bfield_atomic_set_mask(&chunk->bfields[i], mi_bfield_mask(m, idx), &already_set1);
+    mi_assert_internal(n - m > 0);
+    mi_assert_internal(n - m < MI_BFIELD_BITS);
+    size_t already_set2;
+    const bool all_set2 = mi_bfield_atomic_set_mask(&chunk->bfields[i+1], mi_bfield_mask(n - m, 0), &already_set2);
+    if (already_set != NULL) { *already_set = already_set1 + already_set2; }
+    return (all_set1 && all_set2);
+  }
+}
+
+// Set a sequence of `n` bits within a chunk.
+// Returns true if all bits transitioned from 0 to 1 (or 1 to 0).
+mi_decl_noinline static bool mi_bchunk_xsetNC(mi_xset_t set, mi_bchunk_t* chunk, size_t cidx, size_t n, size_t* palready_set, bool* pmaybe_all_clear) {
+  mi_assert_internal(cidx + n <= MI_BCHUNK_BITS);
+  mi_assert_internal(n>0);
+  bool all_transition = true;
+  bool maybe_all_clear = true;
+  size_t total_already_set = 0;
+  size_t idx   = cidx % MI_BFIELD_BITS;
+  size_t field = cidx / MI_BFIELD_BITS;
+  while (n > 0) {
+    size_t m = MI_BFIELD_BITS - idx;   // m is the bits to xset in this field
+    if (m > n) { m = n; }
+    mi_assert_internal(idx + m <= MI_BFIELD_BITS);
+    mi_assert_internal(field < MI_BCHUNK_FIELDS);
+    const mi_bfield_t mask = mi_bfield_mask(m, idx);
+    size_t already_set = 0;
+    bool all_clear = false;
+    const bool transition = (set ? mi_bfield_atomic_set_mask(&chunk->bfields[field], mask, &already_set)
+                                 : mi_bfield_atomic_clear_mask(&chunk->bfields[field], mask, &all_clear));
+    mi_assert_internal((transition && already_set == 0) || (!transition && already_set > 0));
+    all_transition = all_transition && transition;
+    total_already_set += already_set;
+    maybe_all_clear = maybe_all_clear && all_clear;
+    // next field
+    field++;
+    idx = 0;
+    mi_assert_internal(m <= n);
+    n -= m;
+  }
+  if (palready_set!=NULL) { *palready_set = total_already_set; }
+  if (pmaybe_all_clear!=NULL) { *pmaybe_all_clear = maybe_all_clear; }
+  return all_transition;
+}
+
+static inline bool mi_bchunk_setN(mi_bchunk_t* chunk, size_t cidx, size_t n, size_t* already_set) {
+  mi_assert_internal(n>0 && n <= MI_BCHUNK_BITS);
+  if (n==1) return mi_bchunk_set(chunk, cidx, already_set);
+  // if (n==8 && (cidx%8) == 0) return mi_bchunk_set8(chunk, cidx, already_set);
+  // if (n==MI_BFIELD_BITS) return mi_bchunk_setX(chunk, cidx, already_set);
+  if (n<=MI_BFIELD_BITS) return mi_bchunk_setNX(chunk, cidx, n, already_set);
+  return mi_bchunk_xsetNC(MI_BIT_SET, chunk, cidx, n, already_set, NULL);
+}
+
+// ------- mi_bchunk_clear ---------------------------------------
+
+static inline bool mi_bchunk_clear(mi_bchunk_t* chunk, size_t cidx, bool* all_clear) {
+  mi_assert_internal(cidx < MI_BCHUNK_BITS);
+  const size_t i = cidx / MI_BFIELD_BITS;
+  const size_t idx = cidx % MI_BFIELD_BITS;
+  return mi_bfield_atomic_clear(&chunk->bfields[i], idx, all_clear);
+}
+
+static inline bool mi_bchunk_clearN(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* maybe_all_clear) {
+  mi_assert_internal(n>0 && n <= MI_BCHUNK_BITS);
+  if (n==1) return mi_bchunk_clear(chunk, cidx, maybe_all_clear);
+  // if (n==8) return mi_bchunk_clear8(chunk, cidx, maybe_all_clear);
+  // if (n==MI_BFIELD_BITS) return mi_bchunk_clearX(chunk, cidx, maybe_all_clear);
+  // TODO: implement mi_bchunk_xsetNX instead of setNX
+  return mi_bchunk_xsetNC(MI_BIT_CLEAR, chunk, cidx, n, NULL, maybe_all_clear);
+}
+
+// Check if a sequence of `n` bits within a chunk are all set/cleared.
+// This can cross bfield's
+mi_decl_noinline static size_t mi_bchunk_popcountNC(mi_bchunk_t* chunk, size_t field_idx, size_t idx, size_t n) {
+  mi_assert_internal((field_idx*MI_BFIELD_BITS) + idx + n <= MI_BCHUNK_BITS);
+  size_t count = 0;
+  while (n > 0) {
+    size_t m = MI_BFIELD_BITS - idx;   // m is the bits to xset in this field
+    if (m > n) { m = n; }
+    mi_assert_internal(idx + m <= MI_BFIELD_BITS);
+    mi_assert_internal(field_idx < MI_BCHUNK_FIELDS);
+    const size_t mask = mi_bfield_mask(m, idx);
+    count += mi_bfield_atomic_popcount_mask(&chunk->bfields[field_idx], mask);
+    // next field
+    field_idx++;
+    idx = 0;
+    n -= m;
+  }
+  return count;
+}
+
+// Count set bits a sequence of `n` bits.
+static inline size_t mi_bchunk_popcountN(mi_bchunk_t* chunk, size_t cidx, size_t n) {
+  mi_assert_internal(cidx + n <= MI_BCHUNK_BITS);
+  mi_assert_internal(n>0);
+  if (n==0) return 0;
+  const size_t i = cidx / MI_BFIELD_BITS;
+  const size_t idx = cidx % MI_BFIELD_BITS;
+  if (n==1) { return (mi_bfield_atomic_is_set(&chunk->bfields[i], idx) ? 1 : 0); }
+  if (idx + n <= MI_BFIELD_BITS) { return mi_bfield_atomic_popcount_mask(&chunk->bfields[i], mi_bfield_mask(n, idx)); }
+  return mi_bchunk_popcountNC(chunk, i, idx, n);
+}
+
+
+// ------- mi_bchunk_is_xset ---------------------------------------
+
+// Check if a sequence of `n` bits within a chunk are all set/cleared.
+// This can cross bfield's
+mi_decl_noinline static bool mi_bchunk_is_xsetNC(mi_xset_t set, const mi_bchunk_t* chunk, size_t field_idx, size_t idx, size_t n) {
+  mi_assert_internal((field_idx*MI_BFIELD_BITS) + idx + n <= MI_BCHUNK_BITS);
+  while (n > 0) {
+    size_t m = MI_BFIELD_BITS - idx;   // m is the bits to xset in this field
+    if (m > n) { m = n; }
+    mi_assert_internal(idx + m <= MI_BFIELD_BITS);
+    mi_assert_internal(field_idx < MI_BCHUNK_FIELDS);
+    const size_t mask = mi_bfield_mask(m, idx);
+    if (!mi_bfield_atomic_is_xset_mask(set, &chunk->bfields[field_idx], mask)) {
+      return false;
+    }
+    // next field
+    field_idx++;
+    idx = 0;
+    n -= m;
+  }
+  return true;
+}
+
+// Check if a sequence of `n` bits within a chunk are all set/cleared.
+static inline bool mi_bchunk_is_xsetN(mi_xset_t set, const mi_bchunk_t* chunk, size_t cidx, size_t n) {
+  mi_assert_internal(cidx + n <= MI_BCHUNK_BITS);
+  mi_assert_internal(n>0);
+  if (n==0) return true;
+  const size_t i = cidx / MI_BFIELD_BITS;
+  const size_t idx = cidx % MI_BFIELD_BITS;
+  if (n==1) { return mi_bfield_atomic_is_xset(set, &chunk->bfields[i], idx); }
+  if (idx + n <= MI_BFIELD_BITS) { return mi_bfield_atomic_is_xset_mask(set, &chunk->bfields[i], mi_bfield_mask(n, idx)); }
+  return mi_bchunk_is_xsetNC(set, chunk, i, idx, n);
+}
+
+
+// ------- mi_bchunk_try_clear  ---------------------------------------
+
+// Clear `0 < n <= MI_BITFIELD_BITS`. Can cross over a bfield boundary.
+static inline bool mi_bchunk_try_clearNX(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* pmaybe_all_clear) {
+  mi_assert_internal(cidx < MI_BCHUNK_BITS);
+  mi_assert_internal(n <= MI_BFIELD_BITS);
+  const size_t i = cidx / MI_BFIELD_BITS;
+  const size_t idx = cidx % MI_BFIELD_BITS;
+  if mi_likely(idx + n <= MI_BFIELD_BITS) {
+    // within one field
+    return mi_bfield_atomic_try_clear_mask(&chunk->bfields[i], mi_bfield_mask(n, idx), pmaybe_all_clear);
+  }
+  else {
+    // spanning two fields (todo: use double-word atomic ops?)
+    const size_t m = MI_BFIELD_BITS - idx;  // bits to clear in the first field
+    mi_assert_internal(m < n);
+    mi_assert_internal(i < MI_BCHUNK_FIELDS - 1);
+    bool field1_is_clear;
+    if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[i], mi_bfield_mask(m, idx), &field1_is_clear)) return false;
+    // try the second field as well
+    mi_assert_internal(n - m > 0);
+    mi_assert_internal(n - m < MI_BFIELD_BITS);
+    bool field2_is_clear;
+    if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[i+1], mi_bfield_mask(n - m, 0), &field2_is_clear)) {
+      // we failed to clear the second field, restore the first one
+      mi_bfield_atomic_set_mask(&chunk->bfields[i], mi_bfield_mask(m, idx), NULL);
+      return false;
+    }
+    if (pmaybe_all_clear != NULL) { *pmaybe_all_clear = field1_is_clear && field2_is_clear;  }
+    return true;
+  }
+}
+
+// Clear a full aligned bfield.
+// static inline bool mi_bchunk_try_clearX(mi_bchunk_t* chunk, size_t cidx, bool* pmaybe_all_clear) {
+//   mi_assert_internal(cidx < MI_BCHUNK_BITS);
+//   mi_assert_internal((cidx%MI_BFIELD_BITS) == 0);
+//   const size_t i = cidx / MI_BFIELD_BITS;
+//   return mi_bfield_atomic_try_clearX(&chunk->bfields[i], pmaybe_all_clear);
+// }
+
+// Try to atomically clear a sequence of `n` bits within a chunk.
+// Returns true if all bits transitioned from 1 to 0,
+// and false otherwise leaving all bit fields as is.
+// Note: this is the complex one as we need to unwind partial atomic operations if we fail halfway..
+// `maybe_all_clear` is set to `true` if all the bfields involved become zero.
+mi_decl_noinline static bool mi_bchunk_try_clearNC(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* pmaybe_all_clear) {
+  mi_assert_internal(cidx + n <= MI_BCHUNK_BITS);
+  mi_assert_internal(n>0);
+  if (pmaybe_all_clear != NULL) { *pmaybe_all_clear = true; }
+  if (n==0) return true;
+
+  // first field
+  const size_t start_idx = cidx % MI_BFIELD_BITS;
+  const size_t start_field = cidx / MI_BFIELD_BITS;
+  size_t field = start_field;
+  size_t m = MI_BFIELD_BITS - start_idx;   // m are the bits to clear in this field
+  if (m > n) { m = n; }
+  mi_assert_internal(start_idx + m <= MI_BFIELD_BITS);
+  mi_assert_internal(start_field < MI_BCHUNK_FIELDS);
+  const mi_bfield_t mask_start = mi_bfield_mask(m, start_idx);
+  bool maybe_all_clear;
+  if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[field], mask_start, &maybe_all_clear)) return false;
+
+  // done?
+  mi_assert_internal(m <= n);
+  n -= m;
+
+  // continue with mid fields and last field: if these fail we need to recover by unsetting previous fields
+  // mid fields?
+  while (n >= MI_BFIELD_BITS) {
+    field++;
+    mi_assert_internal(field < MI_BCHUNK_FIELDS);
+    bool field_is_clear;
+    if (!mi_bfield_atomic_try_clearX(&chunk->bfields[field], &field_is_clear)) goto restore;
+    maybe_all_clear = maybe_all_clear && field_is_clear;
+    n -= MI_BFIELD_BITS;
+  }
+
+  // last field?
+  if (n > 0) {
+    mi_assert_internal(n < MI_BFIELD_BITS);
+    field++;
+    mi_assert_internal(field < MI_BCHUNK_FIELDS);
+    const mi_bfield_t mask_end = mi_bfield_mask(n, 0);
+    bool field_is_clear;
+    if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[field], mask_end, &field_is_clear)) goto restore;
+    maybe_all_clear = maybe_all_clear && field_is_clear;
+  }
+
+  if (pmaybe_all_clear != NULL) { *pmaybe_all_clear = maybe_all_clear; }
+  return true;
+
+restore:
+  // `field` is the index of the field that failed to set atomically; we need to restore all previous fields
+  mi_assert_internal(field > start_field);
+  while( field > start_field) {
+    field--;
+    if (field == start_field) {
+      mi_bfield_atomic_set_mask(&chunk->bfields[field], mask_start, NULL);
+    }
+    else {
+      mi_bfield_atomic_setX(&chunk->bfields[field], NULL);  // mid-field: set all bits again
+    }
+  }
+  return false;
+}
+
+
+static inline bool mi_bchunk_try_clearN(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* maybe_all_clear) {
+  mi_assert_internal(n>0);
+  // if (n==MI_BFIELD_BITS) return mi_bchunk_try_clearX(chunk, cidx, maybe_all_clear);
+  if (n<=MI_BFIELD_BITS) return mi_bchunk_try_clearNX(chunk, cidx, n, maybe_all_clear);
+  return mi_bchunk_try_clearNC(chunk, cidx, n, maybe_all_clear);
+}
+
+
+// ------- mi_bchunk_try_find_and_clear ---------------------------------------
+
+#if MI_OPT_SIMD && defined(__AVX2__)
+mi_decl_maybe_unused static inline __m256i mi_mm256_zero(void) {
+  return _mm256_setzero_si256();
+}
+mi_decl_maybe_unused static inline __m256i mi_mm256_ones(void) {
+  return _mm256_set1_epi64x(~0);
+}
+mi_decl_maybe_unused static inline bool mi_mm256_is_ones(__m256i vec) {
+  return _mm256_testc_si256(vec, _mm256_cmpeq_epi32(vec, vec));
+}
+mi_decl_maybe_unused static inline bool mi_mm256_is_zero( __m256i vec) {
+  return _mm256_testz_si256(vec,vec);
+}
+#endif
+
+static inline bool mi_bchunk_try_find_and_clear_at(mi_bchunk_t* chunk, size_t chunk_idx, size_t* pidx) {
+  mi_assert_internal(chunk_idx < MI_BCHUNK_FIELDS);
+  // note: this must be acquire (and not relaxed), or otherwise the AVX code below can loop forever
+  // as the compiler won't reload the registers vec1 and vec2 from memory again.
+  const mi_bfield_t b = mi_atomic_load_acquire(&chunk->bfields[chunk_idx]);
+  size_t idx;
+  if (mi_bfield_find_least_bit(b, &idx)) {           // find the least bit
+    if mi_likely(mi_bfield_atomic_try_clear_mask_of(&chunk->bfields[chunk_idx], mi_bfield_mask(1,idx), b, NULL)) {  // clear it atomically
+      *pidx = (chunk_idx*MI_BFIELD_BITS) + idx;
+      mi_assert_internal(*pidx < MI_BCHUNK_BITS);
+      return true;
+    }
+  }
+  return false;
+}
+
+// Find least 1-bit in a chunk and try to clear it atomically
+// set `*pidx` to the bit index (0 <= *pidx < MI_BCHUNK_BITS) on success.
+// This is used to find free slices and abandoned pages and should be efficient.
+// todo: try neon version
+static inline bool mi_bchunk_try_find_and_clear(mi_bchunk_t* chunk, size_t* pidx) {
+  #if MI_OPT_SIMD && defined(__AVX2__) && (MI_BCHUNK_BITS==256)
+  while (true) {
+    const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
+    const __m256i vcmp = _mm256_cmpeq_epi64(vec, mi_mm256_zero()); // (elem64 == 0 ? 0xFF  : 0)
+    const uint32_t mask = ~_mm256_movemask_epi8(vcmp);  // mask of most significant bit of each byte (so each 8 bits are all set or clear)
+    // mask is inverted, so each 8-bits is 0xFF iff the corresponding elem64 has a bit set (and thus can be cleared)
+    if (mask==0) return false;
+    mi_assert_internal((_tzcnt_u32(mask)%8) == 0); // tzcnt == 0, 8, 16, or 24
+    const size_t chunk_idx = _tzcnt_u32(mask) / 8;
+    if (mi_bchunk_try_find_and_clear_at(chunk, chunk_idx, pidx)) return true;
+    // try again
+    // note: there must be an atomic release/acquire in between or otherwise the registers may not be reloaded
+  }
+  #elif MI_OPT_SIMD && defined(__AVX2__) && (MI_BCHUNK_BITS==512)
+  while (true) {
+    size_t chunk_idx = 0;
+    #if 0
+    // one vector at a time
+    __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
+    if (mi_mm256_is_zero(vec)) {
+      chunk_idx += 4;
+      vec = _mm256_load_si256(((const __m256i*)chunk->bfields) + 1);
+    }
+    const __m256i vcmp = _mm256_cmpeq_epi64(vec, mi_mm256_zero()); // (elem64 == 0 ? 0xFF  : 0)
+    const uint32_t mask = ~_mm256_movemask_epi8(vcmp);  // mask of most significant bit of each byte (so each 8 bits are all set or clear)
+    // mask is inverted, so each 8-bits is 0xFF iff the corresponding elem64 has a bit set (and thus can be cleared)
+    if (mask==0) return false;
+    mi_assert_internal((_tzcnt_u32(mask)%8) == 0); // tzcnt == 0, 8, 16, or 24
+    chunk_idx += _tzcnt_u32(mask) / 8;
+    #else
+    // a cache line is 64b so we can just as well load all at the same time
+    const __m256i vec1  = _mm256_load_si256((const __m256i*)chunk->bfields);
+    const __m256i vec2  = _mm256_load_si256(((const __m256i*)chunk->bfields)+1);
+    const __m256i cmpv  = mi_mm256_zero();
+    const __m256i vcmp1 = _mm256_cmpeq_epi64(vec1, cmpv); // (elem64 == 0 ? 0xFF  : 0)
+    const __m256i vcmp2 = _mm256_cmpeq_epi64(vec2, cmpv); // (elem64 == 0 ? 0xFF  : 0)
+    const uint32_t mask1 = ~_mm256_movemask_epi8(vcmp1);  // mask of most significant bit of each byte (so each 8 bits are all set or clear)
+    const uint32_t mask2 = ~_mm256_movemask_epi8(vcmp2);  // mask of most significant bit of each byte (so each 8 bits are all set or clear)
+    const uint64_t mask = ((uint64_t)mask2 << 32) | mask1;
+    // mask is inverted, so each 8-bits is 0xFF iff the corresponding elem64 has a bit set (and thus can be cleared)
+    if (mask==0) return false;
+    mi_assert_internal((_tzcnt_u64(mask)%8) == 0); // tzcnt == 0, 8, 16, 24 , ..
+    chunk_idx = mi_ctz(mask) / 8;
+    #endif
+    if (mi_bchunk_try_find_and_clear_at(chunk, chunk_idx, pidx)) return true;
+    // try again
+    // note: there must be an atomic release/acquire in between or otherwise the registers may not be reloaded
+  }
+  #elif MI_OPT_SIMD && (MI_BCHUNK_BITS==512) && MI_ARCH_ARM64
+  while(true) {
+    // a cache line is 64b so we can just as well load all at the same time (?)
+    const uint64x2_t vzero1_lo = vceqzq_u64(vld1q_u64((uint64_t*)chunk->bfields));        // 2x64 bit is_zero
+    const uint64x2_t vzero1_hi = vceqzq_u64(vld1q_u64((uint64_t*)chunk->bfields + 2));    // 2x64 bit is_zero
+    const uint64x2_t vzero2_lo = vceqzq_u64(vld1q_u64((uint64_t*)chunk->bfields + 4));    // 2x64 bit is_zero
+    const uint64x2_t vzero2_hi = vceqzq_u64(vld1q_u64((uint64_t*)chunk->bfields + 6));    // 2x64 bit is_zero
+    const uint32x4_t vzero1    = vuzp1q_u32(vreinterpretq_u32_u64(vzero1_lo),vreinterpretq_u32_u64(vzero1_hi)); // unzip even elements: narrow to 4x32 bit is_zero ()
+    const uint32x4_t vzero2    = vuzp1q_u32(vreinterpretq_u32_u64(vzero2_lo),vreinterpretq_u32_u64(vzero2_hi)); // unzip even elements: narrow to 4x32 bit is_zero ()
+    const uint32x4_t vzero1x   = vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(vzero1), 24));        // shift-right 2x32bit elem by 24: lo 16 bits contain the 2 lo bytes
+    const uint32x4_t vzero2x   = vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(vzero2), 24));
+    const uint16x8_t vzero12   = vreinterpretq_u16_u32(vuzp1q_u32(vzero1x,vzero2x));                           // unzip even 32-bit elements into one vector
+    const uint8x8_t  vzero     = vmovn_u16(vzero12);                                                           // narrow the bottom 16-bits
+    const uint64_t mask = ~vget_lane_u64(vreinterpret_u64_u8(vzero), 0);  // 1 byte for each bfield (0xFF => bfield has a bit set)
+    if (mask==0) return false;
+    mi_assert_internal((mi_ctz(mask)%8) == 0); // tzcnt == 0, 8, 16, 24 , ..
+    const size_t chunk_idx = mi_ctz(mask) / 8;
+    if (mi_bchunk_try_find_and_clear_at(chunk, chunk_idx, pidx)) return true;
+    // try again
+    // note: there must be an atomic release/acquire in between or otherwise the registers may not be reloaded
+  }
+  #else
+  for (int i = 0; i < MI_BCHUNK_FIELDS; i++) {
+    if (mi_bchunk_try_find_and_clear_at(chunk, i, pidx)) return true;
+  }
+  return false;
+  #endif
+}
+
+static inline bool mi_bchunk_try_find_and_clear_1(mi_bchunk_t* chunk, size_t n, size_t* pidx) {
+  mi_assert_internal(n==1); MI_UNUSED(n);
+  return mi_bchunk_try_find_and_clear(chunk, pidx);
+}
+
+mi_decl_maybe_unused static inline bool mi_bchunk_try_find_and_clear8_at(mi_bchunk_t* chunk, size_t chunk_idx, size_t* pidx) {
+  const mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[chunk_idx]);
+  // has_set8 has low bit in each byte set if the byte in x == 0xFF
+  const mi_bfield_t has_set8 =
+    ((~b - MI_BFIELD_LO_BIT8) &      // high bit set if byte in x is 0xFF or < 0x7F
+     (b  & MI_BFIELD_HI_BIT8))       // high bit set if byte in x is >= 0x80
+     >> 7;                           // shift high bit to low bit
+  size_t idx;
+  if (mi_bfield_find_least_bit(has_set8, &idx)) { // find least 1-bit
+    mi_assert_internal(idx <= (MI_BFIELD_BITS - 8));
+    mi_assert_internal((idx%8)==0);
+    if mi_likely(mi_bfield_atomic_try_clear_mask_of(&chunk->bfields[chunk_idx], (mi_bfield_t)0xFF << idx, b, NULL)) {  // unset the byte atomically
+      *pidx = (chunk_idx*MI_BFIELD_BITS) + idx;
+      mi_assert_internal(*pidx + 8 <= MI_BCHUNK_BITS);
+      return true;
+    }
+  }
+  return false;
+}
+
+// find least aligned byte in a chunk with all bits set, and try unset it atomically
+// set `*pidx` to its bit index (0 <= *pidx < MI_BCHUNK_BITS) on success.
+// Used to find medium size pages in the free blocks.
+// todo: try neon version
+static mi_decl_noinline bool mi_bchunk_try_find_and_clear8(mi_bchunk_t* chunk, size_t* pidx) {
+  #if MI_OPT_SIMD && defined(__AVX2__) && (MI_BCHUNK_BITS==512)
+  while (true) {
+    // since a cache-line is 64b, load all at once
+    const __m256i vec1 = _mm256_load_si256((const __m256i*)chunk->bfields);
+    const __m256i vec2 = _mm256_load_si256((const __m256i*)chunk->bfields+1);
+    const __m256i cmpv = mi_mm256_ones();
+    const __m256i vcmp1 = _mm256_cmpeq_epi8(vec1, cmpv); // (byte == ~0 ? 0xFF : 0)
+    const __m256i vcmp2 = _mm256_cmpeq_epi8(vec2, cmpv); // (byte == ~0 ? 0xFF : 0)
+    const uint32_t mask1 = _mm256_movemask_epi8(vcmp1);    // mask of most significant bit of each byte
+    const uint32_t mask2 = _mm256_movemask_epi8(vcmp2);    // mask of most significant bit of each byte
+    const uint64_t mask = ((uint64_t)mask2 << 32) | mask1;
+    // mask is inverted, so each bit is 0xFF iff the corresponding byte has a bit set (and thus can be cleared)
+    if (mask==0) return false;
+    const size_t bidx = _tzcnt_u64(mask);          // byte-idx of the byte in the chunk
+    const size_t chunk_idx = bidx / 8;
+    const size_t idx = (bidx % 8)*8;
+    mi_assert_internal(chunk_idx < MI_BCHUNK_FIELDS);
+    if mi_likely(mi_bfield_atomic_try_clear8(&chunk->bfields[chunk_idx], idx, NULL)) {  // clear it atomically
+      *pidx = (chunk_idx*MI_BFIELD_BITS) + idx;
+      mi_assert_internal(*pidx + 8 <= MI_BCHUNK_BITS);
+      return true;
+    }
+    // try again
+    // note: there must be an atomic release/acquire in between or otherwise the registers may not be reloaded  }
+  }
+  #else
+    for (int i = 0; i < MI_BCHUNK_FIELDS; i++) {
+      if (mi_bchunk_try_find_and_clear8_at(chunk, i, pidx)) return true;
+    }
+    return false;
+  #endif
+}
+
+static inline bool mi_bchunk_try_find_and_clear_8(mi_bchunk_t* chunk, size_t n, size_t* pidx) {
+  mi_assert_internal(n==8); MI_UNUSED(n);
+  return mi_bchunk_try_find_and_clear8(chunk, pidx);
+}
+
+
+// find a sequence of `n` bits in a chunk with `0 < n <= MI_BFIELD_BITS` with all bits set,
+// and try to clear them atomically.
+// set `*pidx` to its bit index (0 <= *pidx <= MI_BCHUNK_BITS - n) on success.
+// will cross bfield boundaries.
+mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk, size_t n, size_t* pidx) {
+  if (n == 0 || n > MI_BFIELD_BITS) return false;
+  const mi_bfield_t mask = mi_bfield_mask(n, 0);
+  // for all fields in the chunk
+  for (int i = 0; i < MI_BCHUNK_FIELDS; i++) {
+    mi_bfield_t b0 = mi_atomic_load_relaxed(&chunk->bfields[i]);
+    mi_bfield_t b = b0;
+    size_t idx;
+
+    // is there a range inside the field?
+    while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit
+      if (idx + n > MI_BFIELD_BITS) break; // too short: maybe cross over, or continue with the next field
+
+      const size_t bmask = mask<<idx;
+      mi_assert_internal(bmask>>idx == mask);
+      if ((b&bmask) == bmask) { // found a match with all bits set, try clearing atomically
+        if mi_likely(mi_bfield_atomic_try_clear_mask_of(&chunk->bfields[i], bmask, b0, NULL)) {
+          *pidx = (i*MI_BFIELD_BITS) + idx;
+          mi_assert_internal(*pidx < MI_BCHUNK_BITS);
+          mi_assert_internal(*pidx + n <= MI_BCHUNK_BITS);
+          return true;
+        }
+        else {
+          // if we failed to atomically commit, reload b and try again from the start
+          b = b0 = mi_atomic_load_acquire(&chunk->bfields[i]);
+        }
+      }
+      else {
+        // advance by clearing the least run of ones, for example, with n>=4, idx=2:
+        // b             = 1111 1101 1010 1100
+        // .. + (1<<idx) = 1111 1101 1011 0000
+        // .. & b        = 1111 1101 1010 0000
+        b = b & (b + (mi_bfield_one() << idx));
+      }
+    }
+
+    // check if we can cross into the next bfield
+    if (b!=0 && i < MI_BCHUNK_FIELDS-1) {
+      const size_t post = mi_bfield_clz(~b);
+      if (post > 0) {
+        const size_t pre = mi_bfield_ctz(~mi_atomic_load_relaxed(&chunk->bfields[i+1]));
+        if (post + pre >= n) {
+          // it fits -- try to claim it atomically
+          const size_t cidx = (i*MI_BFIELD_BITS) + (MI_BFIELD_BITS - post);
+          if (mi_bchunk_try_clearNX(chunk, cidx, n, NULL)) {
+            // we cleared all atomically
+            *pidx = cidx;
+            mi_assert_internal(*pidx < MI_BCHUNK_BITS);
+            mi_assert_internal(*pidx + n <= MI_BCHUNK_BITS);
+            return true;
+          }
+        }
+      }
+    }
+  }
+  return false;
+}
+
+// find a sequence of `n` bits in a chunk with `n <= MI_BCHUNK_BITS` with all bits set,
+// and try to clear them atomically.
+// set `*pidx` to its bit index (0 <= *pidx <= MI_BCHUNK_BITS - n) on success.
+// This can cross bfield boundaries.
+static mi_decl_noinline bool mi_bchunk_try_find_and_clearNC(mi_bchunk_t* chunk, size_t n, size_t* pidx) {
+  if (n == 0 || n > MI_BCHUNK_BITS) return false;  // cannot be more than a chunk
+
+  // we first scan ahead to see if there is a range of `n` set bits, and only then try to clear atomically
+  mi_assert_internal(n>0);
+  const size_t skip_count = (n-1)/MI_BFIELD_BITS;
+  size_t cidx;
+  for (size_t i = 0; i < MI_BCHUNK_FIELDS - skip_count; i++)
+  {
+    size_t m = n;   // bits to go
+
+    // first field
+    mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i]);
+    size_t ones = mi_bfield_clz(~b);
+
+    cidx = (i*MI_BFIELD_BITS) + (MI_BFIELD_BITS - ones);  // start index
+    if (ones >= m) {
+      // we found enough bits already!
+      m = 0;
+    }
+    else if (ones > 0) {
+      // keep scanning further fields until we have enough bits
+      m -= ones;
+      size_t j = 1;   // field count from i
+      while (i+j < MI_BCHUNK_FIELDS) {
+        mi_assert_internal(m > 0);
+        b = mi_atomic_load_relaxed(&chunk->bfields[i+j]);
+        ones = mi_bfield_ctz(~b);
+        if (ones >= m) {
+          // we found enough bits
+          m = 0;
+          break;
+        }
+        else if (ones == MI_BFIELD_BITS) {
+          // not enough yet, proceed to the next field
+          j++;
+          m -= MI_BFIELD_BITS;
+        }
+        else {
+          // the range was not enough, start from scratch
+          i = i + j - 1;  // no need to re-scan previous fields, except the last one (with clz this time)
+          mi_assert_internal(m>0);
+          break;
+        }
+      }
+    }
+
+    // did we find a range?
+    if (m==0) {
+      if (mi_bchunk_try_clearN(chunk, cidx, n, NULL)) {
+        // we cleared all atomically
+        *pidx = cidx;
+        mi_assert_internal(*pidx < MI_BCHUNK_BITS);
+        mi_assert_internal(*pidx + n <= MI_BCHUNK_BITS);
+        return true;
+      }
+      // note: if we fail for a small `n` on the first field, we don't rescan that field (as `i` is incremented)
+    }
+    // otherwise continue searching
+  }
+  return false;
+}
+
+
+
+// ------- mi_bchunk_clear_once_set ---------------------------------------
+
+static inline void mi_bchunk_clear_once_set(mi_bchunk_t* chunk, size_t cidx) {
+  mi_assert_internal(cidx < MI_BCHUNK_BITS);
+  const size_t i = cidx / MI_BFIELD_BITS;
+  const size_t idx = cidx % MI_BFIELD_BITS;
+  mi_bfield_atomic_clear_once_set(&chunk->bfields[i], idx);
+}
+
+
+// ------- mi_bitmap_all_are_clear ---------------------------------------
+
+
+// are all bits in a bitmap chunk clear?
+static inline bool mi_bchunk_all_are_clear_relaxed(mi_bchunk_t* chunk) {
+  #if MI_OPT_SIMD && defined(__AVX2__) && (MI_BCHUNK_BITS==256)
+  const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
+  return mi_mm256_is_zero(vec);
+  #elif MI_OPT_SIMD &&  defined(__AVX2__) && (MI_BCHUNK_BITS==512)
+  // a 64b cache-line contains the entire chunk anyway so load both at once
+  const __m256i vec1 = _mm256_load_si256((const __m256i*)chunk->bfields);
+  const __m256i vec2 = _mm256_load_si256(((const __m256i*)chunk->bfields)+1);
+  return (mi_mm256_is_zero(_mm256_or_si256(vec1,vec2)));
+  #elif MI_OPT_SIMD && (MI_BCHUNK_BITS==512) && MI_ARCH_ARM64
+  const uint64x2_t v0 = vld1q_u64((uint64_t*)chunk->bfields);
+  const uint64x2_t v1 = vld1q_u64((uint64_t*)chunk->bfields + 2);
+  const uint64x2_t v2 = vld1q_u64((uint64_t*)chunk->bfields + 4);
+  const uint64x2_t v3 = vld1q_u64((uint64_t*)chunk->bfields + 6);
+  const uint64x2_t v  = vorrq_u64(vorrq_u64(v0,v1),vorrq_u64(v2,v3));
+  return (vmaxvq_u32(vreinterpretq_u32_u64(v)) == 0);
+  #else
+  for (int i = 0; i < MI_BCHUNK_FIELDS; i++) {
+    if (mi_atomic_load_relaxed(&chunk->bfields[i]) != 0) return false;
+  }
+  return true;
+  #endif
+}
+
+// are all bits in a bitmap chunk set?
+static inline bool mi_bchunk_all_are_set_relaxed(mi_bchunk_t* chunk) {
+#if MI_OPT_SIMD && defined(__AVX2__) && (MI_BCHUNK_BITS==256)
+  const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
+  return mi_mm256_is_ones(vec);
+#elif MI_OPT_SIMD &&  defined(__AVX2__) && (MI_BCHUNK_BITS==512)
+  // a 64b cache-line contains the entire chunk anyway so load both at once
+  const __m256i vec1 = _mm256_load_si256((const __m256i*)chunk->bfields);
+  const __m256i vec2 = _mm256_load_si256(((const __m256i*)chunk->bfields)+1);
+  return (mi_mm256_is_ones(_mm256_and_si256(vec1, vec2)));
+#elif MI_OPT_SIMD && (MI_BCHUNK_BITS==512) && MI_ARCH_ARM64
+  const uint64x2_t v0 = vld1q_u64((uint64_t*)chunk->bfields);
+  const uint64x2_t v1 = vld1q_u64((uint64_t*)chunk->bfields + 2);
+  const uint64x2_t v2 = vld1q_u64((uint64_t*)chunk->bfields + 4);
+  const uint64x2_t v3 = vld1q_u64((uint64_t*)chunk->bfields + 6);
+  const uint64x2_t v  = vandq_u64(vandq_u64(v0,v1),vandq_u64(v2,v3));
+  return (vminvq_u32(vreinterpretq_u32_u64(v)) == 0xFFFFFFFFUL);
+#else
+  for (int i = 0; i < MI_BCHUNK_FIELDS; i++) {
+    if (~mi_atomic_load_relaxed(&chunk->bfields[i]) != 0) return false;
+  }
+  return true;
+#endif
+}
+
+
+static bool mi_bchunk_bsr(mi_bchunk_t* chunk, size_t* pidx) {
+  for (size_t i = MI_BCHUNK_FIELDS; i > 0; ) {
+    i--;
+    mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i]);
+    size_t idx;
+    if (mi_bsr(b, &idx)) {
+      *pidx = (i*MI_BFIELD_BITS) + idx;
+      return true;
+    }
+  }
+  return false;
+}
+
+static size_t mi_bchunk_popcount(mi_bchunk_t* chunk) {
+  size_t popcount = 0;
+  for (size_t i = 0; i < MI_BCHUNK_FIELDS; i++) {
+    const mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i]);
+    popcount += mi_bfield_popcount(b);
+  }
+  return popcount;
+}
+
+
+/* --------------------------------------------------------------------------------
+ bitmap chunkmap
+-------------------------------------------------------------------------------- */
+
+static void mi_bitmap_chunkmap_set(mi_bitmap_t* bitmap, size_t chunk_idx) {
+  mi_assert(chunk_idx < mi_bitmap_chunk_count(bitmap));
+  mi_bchunk_set(&bitmap->chunkmap, chunk_idx, NULL);
+}
+
+static bool mi_bitmap_chunkmap_try_clear(mi_bitmap_t* bitmap, size_t chunk_idx) {
+  mi_assert(chunk_idx < mi_bitmap_chunk_count(bitmap));
+  // check if the corresponding chunk is all clear
+  if (!mi_bchunk_all_are_clear_relaxed(&bitmap->chunks[chunk_idx])) return false;
+  // clear the chunkmap bit
+  mi_bchunk_clear(&bitmap->chunkmap, chunk_idx, NULL);
+  // .. but a concurrent set may have happened in between our all-clear test and the clearing of the
+  // bit in the mask. We check again to catch this situation.
+  if (!mi_bchunk_all_are_clear_relaxed(&bitmap->chunks[chunk_idx])) {
+    mi_bchunk_set(&bitmap->chunkmap, chunk_idx, NULL);
+    return false;
+  }
+  return true;
+}
+
+
+/* --------------------------------------------------------------------------------
+  bitmap
+-------------------------------------------------------------------------------- */
+
+size_t mi_bitmap_size(size_t bit_count, size_t* pchunk_count) {
+  mi_assert_internal((bit_count % MI_BCHUNK_BITS) == 0);
+  bit_count = _mi_align_up(bit_count, MI_BCHUNK_BITS);
+  mi_assert_internal(bit_count <= MI_BITMAP_MAX_BIT_COUNT);
+  mi_assert_internal(bit_count > 0);
+  const size_t chunk_count = bit_count / MI_BCHUNK_BITS;
+  mi_assert_internal(chunk_count >= 1);
+  const size_t size = offsetof(mi_bitmap_t,chunks) + (chunk_count * MI_BCHUNK_SIZE);
+  mi_assert_internal( (size%MI_BCHUNK_SIZE) == 0 );
+  if (pchunk_count != NULL) { *pchunk_count = chunk_count;  }
+  return size;
+}
+
+
+// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true
+// returns the size of the bitmap
+size_t mi_bitmap_init(mi_bitmap_t* bitmap, size_t bit_count, bool already_zero) {
+  size_t chunk_count;
+  const size_t size = mi_bitmap_size(bit_count, &chunk_count);
+  if (!already_zero) {
+    _mi_memzero_aligned(bitmap, size);
+  }
+  mi_atomic_store_release(&bitmap->chunk_count, chunk_count);
+  mi_assert_internal(mi_atomic_load_relaxed(&bitmap->chunk_count) <= MI_BITMAP_MAX_CHUNK_COUNT);
+  return size;
+}
+
+
+// Set a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
+static void mi_bchunks_unsafe_setN(mi_bchunk_t* chunks, mi_bchunkmap_t* cmap, size_t idx, size_t n) {
+  mi_assert_internal(n>0);
+
+  // start chunk and index
+  size_t chunk_idx = idx / MI_BCHUNK_BITS;
+  const size_t cidx = idx % MI_BCHUNK_BITS;
+  const size_t ccount = _mi_divide_up(n, MI_BCHUNK_BITS);
+
+  // first update the chunkmap
+  mi_bchunk_setN(cmap, chunk_idx, ccount, NULL);
+
+  // first chunk
+  size_t m = MI_BCHUNK_BITS - cidx;
+  if (m > n) { m = n; }
+  mi_bchunk_setN(&chunks[chunk_idx], cidx, m, NULL);
+
+  // n can be large so use memset for efficiency for all in-between chunks
+  chunk_idx++;
+  n -= m;
+  const size_t mid_chunks = n / MI_BCHUNK_BITS;
+  if (mid_chunks > 0) {
+    _mi_memset(&chunks[chunk_idx], ~0, mid_chunks * MI_BCHUNK_SIZE);
+    chunk_idx += mid_chunks;
+    n -= (mid_chunks * MI_BCHUNK_BITS);
+  }
+
+  // last chunk
+  if (n > 0) {
+    mi_assert_internal(n < MI_BCHUNK_BITS);
+    mi_assert_internal(chunk_idx < MI_BCHUNK_FIELDS);
+    mi_bchunk_setN(&chunks[chunk_idx], 0, n, NULL);
+  }
+}
+
+// Set a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
+void mi_bitmap_unsafe_setN(mi_bitmap_t* bitmap, size_t idx, size_t n) {
+  mi_assert_internal(n>0);
+  mi_assert_internal(idx + n <= mi_bitmap_max_bits(bitmap));
+  mi_bchunks_unsafe_setN(&bitmap->chunks[0], &bitmap->chunkmap, idx, n);
+}
+
+
+
+
+// ------- mi_bitmap_xset ---------------------------------------
+
+// Set a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's).
+bool mi_bitmap_setN(mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* palready_set) {
+  mi_assert_internal(n>0);
+  const size_t maxbits = mi_bitmap_max_bits(bitmap);
+  mi_assert_internal(idx + n <= maxbits);
+  if (idx+n > maxbits) { // paranoia
+    if (idx >= maxbits) return false;
+    n = maxbits - idx;
+  }
+
+  // iterate through the chunks
+  size_t chunk_idx = idx / MI_BCHUNK_BITS;
+  size_t cidx = idx % MI_BCHUNK_BITS;
+  bool were_allclear = true;
+  size_t already_set = 0;
+  while (n > 0) {
+    const size_t m = (cidx + n > MI_BCHUNK_BITS ? MI_BCHUNK_BITS - cidx : n);
+    size_t _already_set = 0;
+    were_allclear = mi_bchunk_setN(&bitmap->chunks[chunk_idx], cidx, m, &_already_set) && were_allclear;
+    already_set += _already_set;
+    mi_bitmap_chunkmap_set(bitmap, chunk_idx); // set afterwards
+    mi_assert_internal(m <= n);
+    n -= m;
+    cidx = 0;
+    chunk_idx++;
+  }
+  if (palready_set != NULL) { *palready_set = already_set;  }
+  return were_allclear;
+}
+
+// Clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 1's to 0's.
+bool mi_bitmap_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n) {
+  mi_assert_internal(n>0);
+  const size_t maxbits = mi_bitmap_max_bits(bitmap);
+  mi_assert_internal(idx + n <= maxbits);
+  if (idx+n > maxbits) { // paranoia
+    if (idx >= maxbits) return false;
+    n = maxbits - idx;
+  }
+
+  // iterate through the chunks
+  size_t chunk_idx = idx / MI_BCHUNK_BITS;
+  size_t cidx = idx % MI_BCHUNK_BITS;
+  bool were_allset = true;
+  while (n > 0) {
+    const size_t m = (cidx + n > MI_BCHUNK_BITS ? MI_BCHUNK_BITS - cidx : n);
+    bool maybe_all_clear = false;
+    were_allset = mi_bchunk_clearN(&bitmap->chunks[chunk_idx], cidx, m, &maybe_all_clear) && were_allset;
+    if (maybe_all_clear) { mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx); }
+    mi_assert_internal(m <= n);
+    n -= m;
+    cidx = 0;
+    chunk_idx++;
+  }
+  return were_allset;
+}
+
+// Count bits set in a range of `n` bits.
+size_t mi_bitmap_popcountN( mi_bitmap_t* bitmap, size_t idx, size_t n) {
+  mi_assert_internal(n>0);
+  const size_t maxbits = mi_bitmap_max_bits(bitmap);
+  mi_assert_internal(idx + n <= maxbits);
+  if (idx+n > maxbits) { // paranoia
+    if (idx >= maxbits) return 0;
+    n = maxbits - idx;
+  }
+
+  // iterate through the chunks
+  size_t chunk_idx = idx / MI_BCHUNK_BITS;
+  size_t cidx = idx % MI_BCHUNK_BITS;
+  size_t popcount = 0;
+  while (n > 0) {
+    const size_t m = (cidx + n > MI_BCHUNK_BITS ? MI_BCHUNK_BITS - cidx : n);
+    popcount += mi_bchunk_popcountN(&bitmap->chunks[chunk_idx], cidx, m);
+    mi_assert_internal(m <= n);
+    n -= m;
+    cidx = 0;
+    chunk_idx++;
+  }
+  return popcount;
+}
+
+
+// Set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0)
+bool mi_bitmap_set(mi_bitmap_t* bitmap, size_t idx) {
+  return mi_bitmap_setN(bitmap, idx, 1, NULL);
+}
+
+bool mi_bitmap_clear(mi_bitmap_t* bitmap, size_t idx) {
+  return mi_bitmap_clearN(bitmap, idx, 1);
+}
+
+
+
+// ------- mi_bitmap_is_xset ---------------------------------------
+
+// Is a sequence of n bits already all set/cleared?
+bool mi_bitmap_is_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
+  mi_assert_internal(n>0);
+  const size_t maxbits = mi_bitmap_max_bits(bitmap);
+  mi_assert_internal(idx + n <= maxbits);
+  if (idx+n > maxbits) { // paranoia
+    if (idx >= maxbits) return false;
+    n = maxbits - idx;
+  }
+
+  // iterate through the chunks
+  size_t chunk_idx = idx / MI_BCHUNK_BITS;
+  size_t cidx = idx % MI_BCHUNK_BITS;
+  bool xset = true;
+  while (n > 0 && xset) {
+    const size_t m = (cidx + n > MI_BCHUNK_BITS ? MI_BCHUNK_BITS - cidx : n);
+    xset = mi_bchunk_is_xsetN(set, &bitmap->chunks[chunk_idx], cidx, m) && xset;
+    mi_assert_internal(m <= n);
+    n -= m;
+    cidx = 0;
+    chunk_idx++;
+  }
+  return xset;
+}
+
+bool mi_bitmap_is_all_clear(mi_bitmap_t* bitmap) {
+  return mi_bitmap_is_xsetN(MI_BIT_CLEAR, bitmap, 0, mi_bitmap_max_bits(bitmap));
+}
+
+/* --------------------------------------------------------------------------------
+  Iterate through a bfield
+-------------------------------------------------------------------------------- */
+
+// Cycle iteration through a bitfield. This is used to space out threads
+// so there is less chance of contention. When searching for a free page we
+// like to first search only the accessed part (so we reuse better). This
+// high point is called the `cycle`.
+//
+// We then iterate through the bitfield as:
+// first: [start, cycle>
+// then : [0, start>
+// then : [cycle, MI_BFIELD_BITS>
+//
+// The start is determined usually as `tseq % cycle` to have each thread
+// start at a different spot.
+// - We use `popcount` to improve branch prediction (maybe not needed? can we simplify?)
+// - The `cycle_mask` is the part `[start, cycle>`.
+#define mi_bfield_iterate(bfield,start,cycle,name_idx,SUF) { \
+  mi_assert_internal(start <= cycle); \
+  mi_assert_internal(start < MI_BFIELD_BITS); \
+  mi_assert_internal(cycle <= MI_BFIELD_BITS); \
+  mi_bfield_t _cycle_mask##SUF = mi_bfield_mask(cycle - start, start); \
+  size_t _bcount##SUF = mi_bfield_popcount(bfield); \
+  mi_bfield_t _b##SUF = bfield & _cycle_mask##SUF; /* process [start, cycle> first*/\
+  while(_bcount##SUF > 0) { \
+    _bcount##SUF--;\
+    if (_b##SUF==0) { _b##SUF = bfield & ~_cycle_mask##SUF; } /* process [0,start> + [cycle, MI_BFIELD_BITS> next */ \
+    /* size_t name_idx; */ \
+    bool _found##SUF = mi_bfield_find_least_bit(_b##SUF,&name_idx); \
+    mi_assert_internal(_found##SUF); MI_UNUSED(_found##SUF); \
+    { \
+
+#define mi_bfield_iterate_end(SUF) \
+    } \
+    _b##SUF = mi_bfield_clear_least_bit(_b##SUF); \
+  } \
+}
+
+#define mi_bfield_cycle_iterate(bfield,tseq,cycle,name_idx,SUF) { \
+  const size_t _start##SUF = (uint32_t)(tseq) % (uint32_t)(cycle); /* or: 0 to always search from the start? */\
+  mi_bfield_iterate(bfield,_start##SUF,cycle,name_idx,SUF)
+
+#define mi_bfield_cycle_iterate_end(SUF) \
+  mi_bfield_iterate_end(SUF); }
+
+
+/* --------------------------------------------------------------------------------
+  mi_bitmap_find
+  (used to find free pages)
+-------------------------------------------------------------------------------- */
+
+typedef bool (mi_bitmap_visit_fun_t)(mi_bitmap_t* bitmap, size_t chunk_idx, size_t n, size_t* idx, void* arg1, void* arg2);
+
+// Go through the bitmap and for every sequence of `n` set bits, call the visitor function.
+// If it returns `true` stop the search.
+static inline bool mi_bitmap_find(mi_bitmap_t* bitmap, size_t tseq, size_t n, size_t* pidx, mi_bitmap_visit_fun_t* on_find, void* arg1, void* arg2)
+{
+  const size_t chunkmap_max = _mi_divide_up(mi_bitmap_chunk_count(bitmap), MI_BFIELD_BITS);
+  for (size_t i = 0; i < chunkmap_max; i++) {
+    // and for each chunkmap entry we iterate over its bits to find the chunks
+    const mi_bfield_t cmap_entry = mi_atomic_load_relaxed(&bitmap->chunkmap.bfields[i]);
+    size_t hi;
+    if (mi_bfield_find_highest_bit(cmap_entry, &hi)) {
+      size_t eidx = 0;
+      mi_bfield_cycle_iterate(cmap_entry, tseq%8, hi+1, eidx, Y) // reduce the tseq to 8 bins to reduce using extra memory (see `mstress`)
+      {
+        mi_assert_internal(eidx <= MI_BFIELD_BITS);
+        const size_t chunk_idx = i*MI_BFIELD_BITS + eidx;
+        mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap));
+        if ((*on_find)(bitmap, chunk_idx, n, pidx, arg1, arg2)) {
+          return true;
+        }
+      }
+      mi_bfield_cycle_iterate_end(Y);
+    }
+  }
+  return false;
+}
+
+
+/* --------------------------------------------------------------------------------
+  Bitmap: try_find_and_claim  -- used to allocate abandoned pages
+  note: the compiler will fully inline the indirect function call
+-------------------------------------------------------------------------------- */
+
+typedef struct mi_claim_fun_data_s {
+  mi_arena_t*   arena;  
+} mi_claim_fun_data_t;
+
+static bool mi_bitmap_try_find_and_claim_visit(mi_bitmap_t* bitmap, size_t chunk_idx, size_t n, size_t* pidx, void* arg1, void* arg2)
+{
+  mi_assert_internal(n==1); MI_UNUSED(n);
+  mi_claim_fun_t* claim_fun = (mi_claim_fun_t*)arg1;
+  mi_claim_fun_data_t* claim_data = (mi_claim_fun_data_t*)arg2;
+  size_t cidx;
+  if mi_likely(mi_bchunk_try_find_and_clear(&bitmap->chunks[chunk_idx], &cidx)) {
+    const size_t slice_index = (chunk_idx * MI_BCHUNK_BITS) + cidx;
+    mi_assert_internal(slice_index < mi_bitmap_max_bits(bitmap));
+    bool keep_set = true;
+    if ((*claim_fun)(slice_index, claim_data->arena, &keep_set)) {
+      // success!
+      mi_assert_internal(!keep_set);
+      *pidx = slice_index;
+      return true;
+    }
+    else {
+      // failed to claim it, set abandoned mapping again (unless the page was freed)
+      if (keep_set) {
+        const bool wasclear = mi_bchunk_set(&bitmap->chunks[chunk_idx], cidx, NULL);
+        mi_assert_internal(wasclear); MI_UNUSED(wasclear);
+      }
+    }
+  }
+  else {
+    // we may find that all are cleared only on a second iteration but that is ok as
+    // the chunkmap is a conservative approximation.
+    mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx);
+  }
+  return false;
+}
+
+// Find a set bit in the bitmap and try to atomically clear it and claim it.
+// (Used to find pages in the pages_abandoned bitmaps.)
+mi_decl_nodiscard bool mi_bitmap_try_find_and_claim(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx,
+  mi_claim_fun_t* claim, mi_arena_t* arena )
+{
+  mi_claim_fun_data_t claim_data = { arena };
+  return mi_bitmap_find(bitmap, tseq, 1, pidx, &mi_bitmap_try_find_and_claim_visit, (void*)claim, &claim_data);
+}
+
+
+bool mi_bitmap_bsr(mi_bitmap_t* bitmap, size_t* idx) {
+  const size_t chunkmap_max = _mi_divide_up(mi_bitmap_chunk_count(bitmap), MI_BFIELD_BITS);
+  for (size_t i = chunkmap_max; i > 0; ) {
+    i--;
+    mi_bfield_t cmap = mi_atomic_load_relaxed(&bitmap->chunkmap.bfields[i]);
+    size_t cmap_idx;
+    if (mi_bsr(cmap,&cmap_idx)) {
+      // highest chunk
+      const size_t chunk_idx = i*MI_BFIELD_BITS + cmap_idx;
+      size_t cidx;
+      if (mi_bchunk_bsr(&bitmap->chunks[chunk_idx], &cidx)) {
+        *idx = (chunk_idx * MI_BCHUNK_BITS) + cidx;
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+// Return count of all set bits in a bitmap.
+size_t mi_bitmap_popcount(mi_bitmap_t* bitmap) {
+  // for all chunkmap entries
+  size_t popcount = 0;
+  const size_t chunkmap_max = _mi_divide_up(mi_bitmap_chunk_count(bitmap), MI_BFIELD_BITS);
+  for (size_t i = 0; i < chunkmap_max; i++) {
+    mi_bfield_t cmap_entry = mi_atomic_load_relaxed(&bitmap->chunkmap.bfields[i]);
+    size_t cmap_idx;
+    // for each chunk (corresponding to a set bit in a chunkmap entry)
+    while (mi_bfield_foreach_bit(&cmap_entry, &cmap_idx)) {
+      const size_t chunk_idx = i*MI_BFIELD_BITS + cmap_idx;
+      // count bits in a chunk
+      popcount += mi_bchunk_popcount(&bitmap->chunks[chunk_idx]);
+    }
+  }
+  return popcount;
+}
+
+
+
+// Clear a bit once it is set.
+void mi_bitmap_clear_once_set(mi_bitmap_t* bitmap, size_t idx) {
+  mi_assert_internal(idx < mi_bitmap_max_bits(bitmap));
+  const size_t chunk_idx = idx / MI_BCHUNK_BITS;
+  const size_t cidx = idx % MI_BCHUNK_BITS;
+  mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap));
+  mi_bchunk_clear_once_set(&bitmap->chunks[chunk_idx], cidx);
+}
+
+
+// Visit all set bits in a bitmap.
+// todo: optimize further? maybe use avx512 to directly get all indices using a mask_compressstore?
+bool _mi_bitmap_forall_set(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg) {
+  // for all chunkmap entries
+  const size_t chunkmap_max = _mi_divide_up(mi_bitmap_chunk_count(bitmap), MI_BFIELD_BITS);
+  for(size_t i = 0; i < chunkmap_max; i++) {
+    mi_bfield_t cmap_entry = mi_atomic_load_relaxed(&bitmap->chunkmap.bfields[i]);
+    size_t cmap_idx;
+    // for each chunk (corresponding to a set bit in a chunkmap entry)
+    while (mi_bfield_foreach_bit(&cmap_entry, &cmap_idx)) {
+      const size_t chunk_idx = i*MI_BFIELD_BITS + cmap_idx;
+      // for each chunk field
+      mi_bchunk_t* const chunk = &bitmap->chunks[chunk_idx];
+      for (size_t j = 0; j < MI_BCHUNK_FIELDS; j++) {
+        const size_t base_idx = (chunk_idx*MI_BCHUNK_BITS) + (j*MI_BFIELD_BITS);
+        mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[j]);
+        size_t bidx;
+        while (mi_bfield_foreach_bit(&b, &bidx)) {
+          const size_t idx = base_idx + bidx;
+          if (!visit(idx, 1, arena, arg)) return false;
+        }
+      }
+    }
+  }
+  return true;
+}
+
+// Visit all set bits in a bitmap but try to return ranges (within bfields) if possible.
+// Also clear those ranges atomically.
+// Used by purging to purge larger ranges when possible
+// todo: optimize further? maybe use avx512 to directly get all indices using a mask_compressstore?
+bool _mi_bitmap_forall_setc_ranges(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg) {
+  // for all chunkmap entries
+  const size_t chunkmap_max = _mi_divide_up(mi_bitmap_chunk_count(bitmap), MI_BFIELD_BITS);
+  for (size_t i = 0; i < chunkmap_max; i++) {
+    mi_bfield_t cmap_entry = mi_atomic_load_relaxed(&bitmap->chunkmap.bfields[i]);
+    size_t cmap_idx;
+    // for each chunk (corresponding to a set bit in a chunkmap entry)
+    while (mi_bfield_foreach_bit(&cmap_entry, &cmap_idx)) {
+      const size_t chunk_idx = i*MI_BFIELD_BITS + cmap_idx;
+      // for each chunk field
+      mi_bchunk_t* const chunk = &bitmap->chunks[chunk_idx];
+      for (size_t j = 0; j < MI_BCHUNK_FIELDS; j++) {
+        const size_t base_idx = (chunk_idx*MI_BCHUNK_BITS) + (j*MI_BFIELD_BITS);
+        mi_bfield_t b = mi_atomic_exchange_relaxed(&chunk->bfields[j], 0);
+#if MI_DEBUG > 1
+        const size_t bpopcount = mi_popcount(b);
+        size_t rngcount = 0;
+#endif
+        size_t bidx;
+        while (mi_bfield_find_least_bit(b, &bidx)) {
+          size_t rng = mi_ctz(~(b>>bidx)); // all the set bits from bidx
+#if MI_DEBUG > 1
+          rngcount += rng;
+#endif
+          const size_t idx = base_idx + bidx;
+          mi_assert_internal(rng>=1 && rng<=MI_BFIELD_BITS);
+          mi_assert_internal((idx % MI_BFIELD_BITS) + rng <= MI_BFIELD_BITS);
+          mi_assert_internal((idx / MI_BCHUNK_BITS) < mi_bitmap_chunk_count(bitmap));
+          if (!visit(idx, rng, arena, arg)) return false;
+          // clear rng bits in b
+          b = b & ~mi_bfield_mask(rng, bidx);
+        }
+        mi_assert_internal(rngcount == bpopcount);
+      }
+    }
+  }
+  return true;
+}
+
+// Visit all set bits in a bitmap but try to return ranges (within bfields) if possible,
+// but only in chunks of at least `rngslices` slices (that are also aligned at `rngslices`). 
+// and clear those ranges atomically.
+// Used by purging to purge larger ranges when possible. With transparent huge pages we only
+// want to purge whole huge pages (2 MiB) at a time which is what the `rngslices` parameter achieves.
+bool _mi_bitmap_forall_setc_rangesn(mi_bitmap_t* bitmap, size_t rngslices, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg) 
+{
+  // use the generic routine for `rngslices<=1` (as that one finds longest ranges at a time)
+  if (rngslices<=1) {
+    return _mi_bitmap_forall_setc_ranges(bitmap, visit, arena, arg);
+  }
+  mi_assert_internal(rngslices <= MI_BFIELD_BITS);
+  if (rngslices > MI_BFIELD_BITS) { rngslices = MI_BFIELD_BITS;  }
+
+  // for all chunkmap entries
+  const size_t chunkmap_max = _mi_divide_up(mi_bitmap_chunk_count(bitmap), MI_BFIELD_BITS);
+  for (size_t i = 0; i < chunkmap_max; i++) {
+    mi_bfield_t cmap_entry = mi_atomic_load_relaxed(&bitmap->chunkmap.bfields[i]);
+    size_t cmap_idx;
+    // for each chunk (corresponding to a set bit in a chunkmap entry)
+    while (mi_bfield_foreach_bit(&cmap_entry, &cmap_idx)) {
+      const size_t chunk_idx = i*MI_BFIELD_BITS + cmap_idx;
+      // for each chunk field
+      mi_bchunk_t* const chunk = &bitmap->chunks[chunk_idx];
+      for (size_t j = 0; j < MI_BCHUNK_FIELDS; j++) {
+        const size_t base_idx = (chunk_idx*MI_BCHUNK_BITS) + (j*MI_BFIELD_BITS);
+        mi_bfield_t b = mi_atomic_exchange_relaxed(&chunk->bfields[j], 0);
+        mi_bfield_t skipped = 0;
+        mi_bfield_t rngmask = mi_bfield_mask(rngslices, 0);
+        do {
+          if ((b & rngmask) == rngmask) {
+            const size_t idx = base_idx + mi_ctz(rngmask);
+            if (!visit(idx, rngslices, arena, arg)) {
+              // break early
+              if (skipped != 0) {
+                mi_atomic_or_relaxed(&chunk->bfields[j], skipped);
+                return false;
+              }
+            }
+          }
+          else {
+            skipped = skipped | (b & rngmask);
+          }
+          rngmask <<= rngslices;
+        } while (rngmask != 0);
+        
+        if (skipped != 0) {
+          mi_atomic_or_relaxed(&chunk->bfields[j], skipped);
+        }
+      }
+    }
+  }
+  return true;
+}
+
+
+/* --------------------------------------------------------------------------------
+  binned bitmap's
+-------------------------------------------------------------------------------- */
+
+
+size_t mi_bbitmap_size(size_t bit_count, size_t* pchunk_count) {
+  // mi_assert_internal((bit_count % MI_BCHUNK_BITS) == 0);
+  bit_count = _mi_align_up(bit_count, MI_BCHUNK_BITS);
+  mi_assert_internal(bit_count <= MI_BITMAP_MAX_BIT_COUNT);
+  mi_assert_internal(bit_count > 0);
+  const size_t chunk_count = bit_count / MI_BCHUNK_BITS;
+  mi_assert_internal(chunk_count >= 1);
+  const size_t size = offsetof(mi_bbitmap_t,chunks) + (chunk_count * MI_BCHUNK_SIZE);
+  mi_assert_internal( (size%MI_BCHUNK_SIZE) == 0 );
+  if (pchunk_count != NULL) { *pchunk_count = chunk_count;  }
+  return size;
+}
+
+// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true
+// returns the size of the bitmap
+size_t mi_bbitmap_init(mi_bbitmap_t* bbitmap, size_t bit_count, bool already_zero) {
+  size_t chunk_count;
+  const size_t size = mi_bbitmap_size(bit_count, &chunk_count);
+  if (!already_zero) {
+    _mi_memzero_aligned(bbitmap, size);
+  }
+  mi_atomic_store_release(&bbitmap->chunk_count, chunk_count);
+  mi_assert_internal(mi_atomic_load_relaxed(&bbitmap->chunk_count) <= MI_BITMAP_MAX_CHUNK_COUNT);
+  return size;
+}
+
+void mi_bbitmap_unsafe_setN(mi_bbitmap_t* bbitmap, size_t idx, size_t n) {
+  mi_assert_internal(n>0);
+  mi_assert_internal(idx + n <= mi_bbitmap_max_bits(bbitmap));
+  mi_bchunks_unsafe_setN(&bbitmap->chunks[0], &bbitmap->chunkmap, idx, n);
+}
+
+
+
+/* --------------------------------------------------------------------------------
+ binned bitmap used to track free slices
+-------------------------------------------------------------------------------- */
+
+// Assign a specific size bin to a chunk
+static void mi_bbitmap_set_chunk_bin(mi_bbitmap_t* bbitmap, size_t chunk_idx, mi_chunkbin_t bin) {
+  mi_assert_internal(chunk_idx < mi_bbitmap_chunk_count(bbitmap));
+  for (mi_chunkbin_t ibin = MI_CBIN_SMALL; ibin < MI_CBIN_NONE; ibin = mi_chunkbin_inc(ibin)) {
+    if (ibin == bin) {
+      const bool was_clear = mi_bchunk_set(& bbitmap->chunkmap_bins[ibin], chunk_idx, NULL);
+      if (was_clear) { mi_os_stat_increase(chunk_bins[ibin],1); }
+    }
+    else {
+      const bool was_set = mi_bchunk_clear(&bbitmap->chunkmap_bins[ibin], chunk_idx, NULL);
+      if (was_set) { mi_os_stat_decrease(chunk_bins[ibin],1); }
+    }
+  }
+}
+
+mi_chunkbin_t mi_bbitmap_debug_get_bin(const mi_bchunkmap_t* chunkmap_bins, size_t chunk_idx) {
+  for (mi_chunkbin_t ibin = MI_CBIN_SMALL; ibin < MI_CBIN_NONE; ibin = mi_chunkbin_inc(ibin)) {
+    if (mi_bchunk_is_xsetN(MI_BIT_SET, &chunkmap_bins[ibin], chunk_idx, 1)) {
+      return ibin;
+    }
+  }
+  return MI_CBIN_NONE;
+}
+
+// Track the index of the highest chunk that is accessed.
+static void mi_bbitmap_chunkmap_set_max(mi_bbitmap_t* bbitmap, size_t chunk_idx) {
+  size_t oldmax = mi_atomic_load_relaxed(&bbitmap->chunk_max_accessed);
+  if mi_unlikely(chunk_idx > oldmax) {
+    mi_atomic_cas_strong_relaxed(&bbitmap->chunk_max_accessed, &oldmax, chunk_idx);
+  }
+}
+
+// Set a bit in the chunkmap
+static void mi_bbitmap_chunkmap_set(mi_bbitmap_t* bbitmap, size_t chunk_idx, bool check_all_set) {
+  mi_assert(chunk_idx < mi_bbitmap_chunk_count(bbitmap));
+  if (check_all_set) {
+    if (mi_bchunk_all_are_set_relaxed(&bbitmap->chunks[chunk_idx])) {
+      // all slices are free in this chunk: return back to the NONE bin
+      mi_bbitmap_set_chunk_bin(bbitmap, chunk_idx, MI_CBIN_NONE);
+    }
+  }
+  mi_bchunk_set(&bbitmap->chunkmap, chunk_idx, NULL);
+  mi_bbitmap_chunkmap_set_max(bbitmap, chunk_idx);
+}
+
+static bool mi_bbitmap_chunkmap_try_clear(mi_bbitmap_t* bbitmap, size_t chunk_idx) {
+  mi_assert(chunk_idx < mi_bbitmap_chunk_count(bbitmap));
+  // check if the corresponding chunk is all clear
+  if (!mi_bchunk_all_are_clear_relaxed(&bbitmap->chunks[chunk_idx])) return false;
+  // clear the chunkmap bit
+  mi_bchunk_clear(&bbitmap->chunkmap, chunk_idx, NULL);
+  // .. but a concurrent set may have happened in between our all-clear test and the clearing of the
+  // bit in the mask. We check again to catch this situation. (note: mi_bchunk_clear must be acq-rel)
+  if (!mi_bchunk_all_are_clear_relaxed(&bbitmap->chunks[chunk_idx])) {
+    mi_bchunk_set(&bbitmap->chunkmap, chunk_idx, NULL);
+    return false;
+  }
+  mi_bbitmap_chunkmap_set_max(bbitmap, chunk_idx);
+  return true;
+}
+
+
+/* --------------------------------------------------------------------------------
+  mi_bbitmap_setN, try_clearN, and is_xsetN
+  (used to find free pages)
+-------------------------------------------------------------------------------- */
+
+// Set a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's).
+bool mi_bbitmap_setN(mi_bbitmap_t* bbitmap, size_t idx, size_t n) {
+  mi_assert_internal(n>0);
+  const size_t maxbits = mi_bbitmap_max_bits(bbitmap);
+  mi_assert_internal(idx + n <= maxbits);
+  if (idx+n > maxbits) { // paranoia
+    if (idx >= maxbits) return false;
+    n = maxbits - idx;
+  }
+
+  // iterate through the chunks
+  size_t chunk_idx = idx / MI_BCHUNK_BITS;
+  size_t cidx = idx % MI_BCHUNK_BITS;
+  bool were_allclear = true;
+  while (n > 0) {
+    const size_t m = (cidx + n > MI_BCHUNK_BITS ? MI_BCHUNK_BITS - cidx : n);
+    were_allclear = mi_bchunk_setN(&bbitmap->chunks[chunk_idx], cidx, m, NULL) && were_allclear;
+    mi_bbitmap_chunkmap_set(bbitmap, chunk_idx, true); // set afterwards
+    mi_assert_internal(m <= n);
+    n -= m;
+    cidx = 0;
+    chunk_idx++;
+  }
+  return were_allclear;
+}
+
+// ------- mi_bbitmap_try_clearNC ---------------------------------------
+
+// Try to clear `n` bits at `idx` where `n <= MI_BCHUNK_BITS`.
+bool mi_bbitmap_try_clearNC(mi_bbitmap_t* bbitmap, size_t idx, size_t n) {
+  mi_assert_internal(n>0);
+  mi_assert_internal(n<=MI_BCHUNK_BITS);
+  mi_assert_internal(idx + n <= mi_bbitmap_max_bits(bbitmap));
+
+  const size_t chunk_idx = idx / MI_BCHUNK_BITS;
+  const size_t cidx = idx % MI_BCHUNK_BITS;
+  mi_assert_internal(cidx + n <= MI_BCHUNK_BITS);  // don't cross chunks (for now)
+  mi_assert_internal(chunk_idx < mi_bbitmap_chunk_count(bbitmap));
+  if (cidx + n > MI_BCHUNK_BITS) return false;
+  bool maybe_all_clear = false;
+  const bool cleared = mi_bchunk_try_clearN(&bbitmap->chunks[chunk_idx], cidx, n, &maybe_all_clear);
+  if (cleared && maybe_all_clear) { mi_bbitmap_chunkmap_try_clear(bbitmap, chunk_idx); }
+  // note: we don't set the size class for an explicit try_clearN (only used by purging)
+  return cleared;
+}
+
+
+
+// ------- mi_bbitmap_is_xset ---------------------------------------
+
+// Is a sequence of n bits already all set/cleared?
+bool mi_bbitmap_is_xsetN(mi_xset_t set, mi_bbitmap_t* bbitmap, size_t idx, size_t n) {
+  mi_assert_internal(n>0);
+  const size_t maxbits = mi_bbitmap_max_bits(bbitmap);
+  mi_assert_internal(idx + n <= maxbits);
+  if (idx+n > maxbits) { // paranoia
+    if (idx >= maxbits) return false;
+    n = maxbits - idx;
+  }
+
+  // iterate through the chunks
+  size_t chunk_idx = idx / MI_BCHUNK_BITS;
+  size_t cidx = idx % MI_BCHUNK_BITS;
+  bool xset = true;
+  while (n > 0 && xset) {
+    const size_t m = (cidx + n > MI_BCHUNK_BITS ? MI_BCHUNK_BITS - cidx : n);
+    xset = mi_bchunk_is_xsetN(set, &bbitmap->chunks[chunk_idx], cidx, m) && xset;
+    mi_assert_internal(m <= n);
+    n -= m;
+    cidx = 0;
+    chunk_idx++;
+  }
+  return xset;
+}
+
+
+
+
+/* --------------------------------------------------------------------------------
+  mi_bbitmap_find
+  (used to find free pages)
+-------------------------------------------------------------------------------- */
+
+typedef bool (mi_bchunk_try_find_and_clear_fun_t)(mi_bchunk_t* chunk, size_t n, size_t* idx);
+
+// Go through the bbitmap and for every sequence of `n` set bits, call the visitor function.
+// If it returns `true` stop the search.
+//
+// This is used for finding free blocks and it is important to be efficient (with 2-level bitscan)
+// but also reduce fragmentation (through size bins).
+static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap, size_t tseq, size_t n, size_t* pidx, mi_bchunk_try_find_and_clear_fun_t* on_find)
+{
+  // we space out threads to reduce contention
+  const size_t cmap_max_count  = _mi_divide_up(mi_bbitmap_chunk_count(bbitmap),MI_BFIELD_BITS);
+  const size_t chunk_acc       = mi_atomic_load_relaxed(&bbitmap->chunk_max_accessed);
+  const size_t cmap_acc        = chunk_acc / MI_BFIELD_BITS;
+  const size_t cmap_acc_bits   = 1 + (chunk_acc % MI_BFIELD_BITS);
+
+  // create a mask over the chunkmap entries to iterate over them efficiently
+  mi_assert_internal(MI_BFIELD_BITS >= MI_BCHUNK_FIELDS);
+  const mi_bfield_t cmap_mask  = mi_bfield_mask(cmap_max_count,0);
+  const size_t cmap_cycle      = cmap_acc+1;
+  const mi_chunkbin_t bbin = mi_chunkbin_of(n);
+  // visit each cmap entry
+  size_t cmap_idx = 0;
+  mi_bfield_cycle_iterate(cmap_mask, tseq, cmap_cycle, cmap_idx, X)
+  {
+    // and for each chunkmap entry we iterate over its bits to find the chunks
+    const mi_bfield_t cmap_entry = mi_atomic_load_relaxed(&bbitmap->chunkmap.bfields[cmap_idx]);
+    const size_t cmap_entry_cycle = (cmap_idx != cmap_acc ? MI_BFIELD_BITS : cmap_acc_bits);
+    if (cmap_entry == 0) continue;
+
+    // get size bin masks
+    mi_bfield_t cmap_bins[MI_CBIN_COUNT] = { 0 };
+    cmap_bins[MI_CBIN_NONE] = cmap_entry;
+    for (mi_chunkbin_t ibin = MI_CBIN_SMALL; ibin < MI_CBIN_NONE; ibin = mi_chunkbin_inc(ibin)) {
+      const mi_bfield_t cmap_bin = mi_atomic_load_relaxed(&bbitmap->chunkmap_bins[ibin].bfields[cmap_idx]);
+      cmap_bins[ibin] = cmap_bin & cmap_entry;
+      cmap_bins[MI_CBIN_NONE] &= ~cmap_bin;      // clear bits that are in an assigned size bin
+    }
+
+    // consider only chunks for a particular size bin at a time
+    // this picks the best bin only within a cmap entry (~ 1GiB address space), but avoids multiple
+    // iterations through all entries.
+    mi_assert_internal(bbin < MI_CBIN_NONE);
+    for (mi_chunkbin_t ibin = MI_CBIN_SMALL; ibin <= MI_CBIN_NONE;
+          // skip from bbin to NONE (so, say, a SMALL will never be placed in a OTHER, MEDIUM, or LARGE chunk to reduce fragmentation)
+          ibin = (ibin == bbin ? MI_CBIN_NONE : mi_chunkbin_inc(ibin)))
+    {
+      mi_assert_internal(ibin < MI_CBIN_COUNT);
+      const mi_bfield_t cmap_bin = cmap_bins[ibin];
+      size_t eidx = 0;
+      mi_bfield_cycle_iterate(cmap_bin, tseq, cmap_entry_cycle, eidx, Y)
+      {
+        // assertion doesn't quite hold as the max_accessed may be out-of-date
+        // mi_assert_internal(cmap_entry_cycle > eidx || ibin == MI_CBIN_NONE);
+
+        // get the chunk
+        const size_t chunk_idx = cmap_idx*MI_BFIELD_BITS + eidx;
+        mi_bchunk_t* chunk = &bbitmap->chunks[chunk_idx];
+
+        size_t cidx;
+        if ((*on_find)(chunk, n, &cidx)) {
+          if (cidx==0 && ibin == MI_CBIN_NONE) { // only the first block determines the size bin
+            // this chunk is now reserved for the `bbin` size class
+            mi_bbitmap_set_chunk_bin(bbitmap, chunk_idx, bbin);
+          }
+          *pidx = (chunk_idx * MI_BCHUNK_BITS) + cidx;
+          mi_assert_internal(*pidx + n <= mi_bbitmap_max_bits(bbitmap));
+          return true;
+        }
+        else {
+          // todo: should _on_find_ return a boolean if there is a chance all are clear to avoid calling `try_clear?`
+          // we may find that all are cleared only on a second iteration but that is ok as the chunkmap is a conservative approximation.
+          mi_bbitmap_chunkmap_try_clear(bbitmap, chunk_idx);
+        }
+      }
+      mi_bfield_cycle_iterate_end(Y);
+    }
+  }
+  mi_bfield_cycle_iterate_end(X);
+  return false;
+}
+
+/* --------------------------------------------------------------------------------
+  mi_bbitmap_try_find_and_clear -- used to find free pages
+  note: the compiler will fully inline the indirect function calls
+-------------------------------------------------------------------------------- */
+
+bool mi_bbitmap_try_find_and_clear(mi_bbitmap_t* bbitmap, size_t tseq, size_t* pidx) {
+  return mi_bbitmap_try_find_and_clear_generic(bbitmap, tseq, 1, pidx, &mi_bchunk_try_find_and_clear_1);
+}
+
+bool mi_bbitmap_try_find_and_clear8(mi_bbitmap_t* bbitmap, size_t tseq, size_t* pidx) {
+  return mi_bbitmap_try_find_and_clear_generic(bbitmap, tseq, 8, pidx, &mi_bchunk_try_find_and_clear_8);
+}
+
+// bool mi_bbitmap_try_find_and_clearX(mi_bbitmap_t* bbitmap, size_t tseq, size_t* pidx) {
+//   return mi_bbitmap_try_find_and_clear_generic(bbitmap, tseq, MI_BFIELD_BITS, pidx, &mi_bchunk_try_find_and_clear_X);
+// }
+
+bool mi_bbitmap_try_find_and_clearNX(mi_bbitmap_t* bbitmap, size_t tseq, size_t n, size_t* pidx) {
+  mi_assert_internal(n<=MI_BFIELD_BITS);
+  return mi_bbitmap_try_find_and_clear_generic(bbitmap, tseq, n, pidx, &mi_bchunk_try_find_and_clearNX);
+}
+
+bool mi_bbitmap_try_find_and_clearNC(mi_bbitmap_t* bbitmap, size_t tseq, size_t n, size_t* pidx) {
+  mi_assert_internal(n<=MI_BCHUNK_BITS);
+  return mi_bbitmap_try_find_and_clear_generic(bbitmap, tseq, n, pidx, &mi_bchunk_try_find_and_clearNC);
+}
+
+
+/* --------------------------------------------------------------------------------
+  mi_bbitmap_try_find_and_clear for huge objects spanning multiple chunks
+-------------------------------------------------------------------------------- */
+
+// Try to atomically clear `n` bits starting at `chunk_idx` where `n` can span over multiple chunks
+static bool mi_bchunk_try_clearN_(mi_bbitmap_t* bbitmap, size_t chunk_idx, size_t n) {
+  mi_assert_internal((chunk_idx * MI_BCHUNK_BITS) + n <= mi_bbitmap_max_bits(bbitmap));
+
+  size_t m = n;      // bits to go
+  size_t count = 0;  // chunk count
+  while (m > 0) {
+    mi_bchunk_t* chunk = &bbitmap->chunks[chunk_idx + count];
+    if (!mi_bchunk_try_clearN(chunk, 0, (m > MI_BCHUNK_BITS ? MI_BCHUNK_BITS : m), NULL)) {
+      goto rollback;
+    }
+    m = (m <= MI_BCHUNK_BITS ? 0 : m - MI_BCHUNK_BITS);
+    count++;
+  }
+  return true;
+
+rollback:
+  // we only need to reset chunks the we just fully cleared
+  while (count > 0) {
+    count--;
+    mi_bchunk_t* chunk = &bbitmap->chunks[chunk_idx + count];
+    mi_bchunk_setN(chunk, 0, MI_BCHUNK_BITS, NULL);
+  }
+  return false;
+}
+
+// Go through the bbitmap to find a sequence of `n` bits and clear them atomically where `n > MI_ARENA_MAX_CHUNK_OBJ_SIZE`
+// Since these are very large object allocations we always search from the start and only consider starting at the start
+// of a chunk (for fragmentation and efficiency).
+// Todo: for now we try to find full empty chunks to cover `n` but we can allow a partial chunk at the end
+// Todo: This scans directly through the chunks -- we might want to consult the cmap as well?
+bool mi_bbitmap_try_find_and_clearN_(mi_bbitmap_t* bbitmap, size_t tseq, size_t n, size_t* pidx) {
+  MI_UNUSED(tseq);
+  mi_assert(n > 0); if (n==0) { return false; }
+
+  const size_t chunk_max = mi_bbitmap_chunk_count(bbitmap);
+  const size_t chunk_req = _mi_divide_up(n, MI_BCHUNK_BITS);  // minimal number of chunks needed
+  if (chunk_max < chunk_req) { return false; }
+
+  // iterate through the chunks
+  size_t chunk_idx = 0;
+  while (chunk_idx < chunk_max - chunk_req)
+  {
+    size_t count = 0;  // chunk count
+    do {
+      mi_assert_internal(chunk_idx + count < chunk_max);
+      mi_bchunk_t* const chunk = &bbitmap->chunks[chunk_idx + count];
+      if (!mi_bchunk_all_are_set_relaxed(chunk)) {
+        break;
+      }
+      else {
+        count++;
+      }
+    }
+    while (count < chunk_req);
+
+    // did we find a suitable range?
+    if (count == chunk_req) {
+      // now try to claim it!
+      if (mi_bchunk_try_clearN_(bbitmap, chunk_idx, n)) {
+        *pidx = (chunk_idx * MI_BCHUNK_BITS);
+        for (size_t i = 0; i < count; i++) {
+          mi_bbitmap_set_chunk_bin(bbitmap, chunk_idx + i, MI_CBIN_HUGE);
+        }
+        mi_assert_internal(*pidx + n <= mi_bbitmap_max_bits(bbitmap));
+        return true;
+      }
+    }
+
+    // keep searching but skip the scanned range
+    chunk_idx += count+1;
+  }
+  return false;
+}
+
+
+
+
+
diff --git a/3rd/mimalloc-3.2.6/src/bitmap.h b/3rd/mimalloc-3.2.6/src/bitmap.h
new file mode 100644
index 00000000..6e9d2f1d
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/bitmap.h
@@ -0,0 +1,339 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2019-2024 Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+/* ----------------------------------------------------------------------------
+Concurrent bitmap that can set/reset sequences of bits atomically
+---------------------------------------------------------------------------- */
+#pragma once
+#ifndef MI_BITMAP_H
+#define MI_BITMAP_H
+
+/* --------------------------------------------------------------------------------
+  Atomic bitmaps with release/acquire guarantees:
+
+  `mi_bfield_t`: is a single machine word that can efficiently be bit counted (usually `size_t`)
+      each bit usually represents a single MI_ARENA_SLICE_SIZE in an arena (64 KiB).
+      We need 16K bits to represent a 1GiB arena.
+
+  `mi_bchunk_t`: a chunk of bfield's of a total of MI_BCHUNK_BITS (= 512 on 64-bit, 256 on 32-bit)
+      allocations never span across chunks -- so MI_ARENA_MAX_OBJ_SIZE is the number
+      of bits in a chunk times the MI_ARENA_SLICE_SIZE (512 * 64KiB = 32 MiB).
+      These chunks are cache-aligned and we can use AVX2/AVX512/NEON/SVE/SVE2/etc. instructions
+      to scan for bits (perhaps) more efficiently.
+
+      We allocate byte-sized ranges aligned to bytes in the bfield, and bfield-sized
+      ranges aligned to a bfield.
+
+    Searching linearly through the chunks would be too slow (16K bits per GiB).
+    Instead we add a "chunkmap" to do a two-level search (more or less a btree of depth 2).
+
+   `mi_bchunkmap_t` (== `mi_bchunk_t`): for each chunk we track if it has (potentially) any bit set.
+      The chunkmap has 1 bit per chunk that is set if the chunk potentially has a bit set.
+      This is used to avoid scanning every chunk. (and thus strictly an optimization)
+      It is conservative: it is fine to set a bit in the chunk map even if the chunk turns out
+      to have no bits set. It is also allowed to briefly have a clear bit even if the
+      chunk has bits set -- as long as we guarantee that the bit will be set later on;
+      (this allows us to set the chunkmap bit right after we set a bit in the corresponding chunk).
+
+      However, when we clear a bit in a chunk, and the chunk is indeed all clear, we
+      cannot safely clear the bit corresponding to the chunk in the chunkmap since it
+      may race with another thread setting a bit in the same chunk. Therefore, when
+      clearing, we first test if a chunk is clear, then clear the chunkmap bit, and
+      then test again to catch any set bits that we may have missed.
+
+      Since the chunkmap may thus be briefly out-of-sync, this means that we may sometimes
+      not find a free page even though it's there (but we accept this as we avoid taking
+      full locks). (Another way to do this is to use an epoch but we like to avoid that complexity
+      for now).
+
+   `mi_bitmap_t`: a bitmap with N chunks. A bitmap has a chunkmap of MI_BCHUNK_BITS (512)
+      and thus has at most 512 chunks (=2^18 bits x 64 KiB slices = 16 GiB max arena size).
+      The minimum is 1 chunk which is a 32 MiB arena.
+
+   For now, the implementation assumes MI_HAS_FAST_BITSCAN and uses trailing-zero-count
+   and pop-count (but we think it can be adapted work reasonably well on older hardware too)
+--------------------------------------------------------------------------------------------- */
+
+// A word-size bit field.
+typedef size_t mi_bfield_t;
+
+#define MI_BFIELD_BITS_SHIFT         (MI_SIZE_SHIFT+3)
+#define MI_BFIELD_BITS               (1 << MI_BFIELD_BITS_SHIFT)
+#define MI_BFIELD_SIZE               (MI_BFIELD_BITS/8)
+#define MI_BFIELD_LO_BIT8            (((~(mi_bfield_t)0))/0xFF)         // 0x01010101 ..
+#define MI_BFIELD_HI_BIT8            (MI_BFIELD_LO_BIT8 << 7)           // 0x80808080 ..
+
+#define MI_BCHUNK_SIZE               (MI_BCHUNK_BITS / 8)
+#define MI_BCHUNK_FIELDS             (MI_BCHUNK_BITS / MI_BFIELD_BITS)  // 8 on both 64- and 32-bit
+
+
+// some compiler (msvc in C mode) cannot have expressions in the alignment attribute
+#if MI_BCHUNK_SIZE==64
+#define mi_decl_bchunk_align  mi_decl_align(64)
+#elif MI_BCHUNK_SIZE==32
+#define mi_decl_bchunk_align  mi_decl_align(32)
+#else
+#define mi_decl_bchunk_align  mi_decl_align(MI_BCHUNK_SIZE)
+#endif
+
+
+// A bitmap chunk contains 512 bits on 64-bit  (256 on 32-bit)
+typedef mi_decl_bchunk_align struct mi_bchunk_s {
+  _Atomic(mi_bfield_t) bfields[MI_BCHUNK_FIELDS];
+} mi_bchunk_t;
+
+
+// The chunkmap has one bit per corresponding chunk that is set if the chunk potentially has bits set.
+// The chunkmap is itself a chunk.
+typedef mi_bchunk_t mi_bchunkmap_t;
+
+#define MI_BCHUNKMAP_BITS             MI_BCHUNK_BITS
+
+#define MI_BITMAP_MAX_CHUNK_COUNT     (MI_BCHUNKMAP_BITS)
+#define MI_BITMAP_MIN_CHUNK_COUNT     (1)
+#if MI_SIZE_BITS > 32
+#define MI_BITMAP_DEFAULT_CHUNK_COUNT     (64)  // 2 GiB on 64-bit -- this is for the page map
+#else
+#define MI_BITMAP_DEFAULT_CHUNK_COUNT      (1)
+#endif
+#define MI_BITMAP_MAX_BIT_COUNT       (MI_BITMAP_MAX_CHUNK_COUNT * MI_BCHUNK_BITS)  // 16 GiB arena
+#define MI_BITMAP_MIN_BIT_COUNT       (MI_BITMAP_MIN_CHUNK_COUNT * MI_BCHUNK_BITS)  // 32 MiB arena
+#define MI_BITMAP_DEFAULT_BIT_COUNT   (MI_BITMAP_DEFAULT_CHUNK_COUNT * MI_BCHUNK_BITS)  // 2 GiB arena
+
+
+// An atomic bitmap
+typedef mi_decl_bchunk_align struct mi_bitmap_s {
+  _Atomic(size_t)  chunk_count;         // total count of chunks (0 < N <= MI_BCHUNKMAP_BITS)
+  size_t           _padding[MI_BCHUNK_SIZE/MI_SIZE_SIZE - 1];    // suppress warning on msvc
+  mi_bchunkmap_t   chunkmap;
+  mi_bchunk_t      chunks[MI_BITMAP_DEFAULT_CHUNK_COUNT];        // usually dynamic MI_BITMAP_MAX_CHUNK_COUNT
+} mi_bitmap_t;
+
+
+static inline size_t mi_bitmap_chunk_count(const mi_bitmap_t* bitmap) {
+  return mi_atomic_load_relaxed(&((mi_bitmap_t*)bitmap)->chunk_count);
+}
+
+static inline size_t mi_bitmap_max_bits(const mi_bitmap_t* bitmap) {
+  return (mi_bitmap_chunk_count(bitmap) * MI_BCHUNK_BITS);
+}
+
+
+
+/* --------------------------------------------------------------------------------
+  Atomic bitmap operations
+-------------------------------------------------------------------------------- */
+
+// Many operations are generic over setting or clearing the bit sequence: we use `mi_xset_t` for this (true if setting, false if clearing)
+typedef bool  mi_xset_t;
+#define MI_BIT_SET    (true)
+#define MI_BIT_CLEAR  (false)
+
+
+// Required size of a bitmap to represent `bit_count` bits.
+size_t mi_bitmap_size(size_t bit_count, size_t* chunk_count);
+
+// Initialize a bitmap to all clear; avoid a mem_zero if `already_zero` is true
+// returns the size of the bitmap.
+size_t mi_bitmap_init(mi_bitmap_t* bitmap, size_t bit_count, bool already_zero);
+
+// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks).
+// Not atomic so only use if still local to a thread.
+void mi_bitmap_unsafe_setN(mi_bitmap_t* bitmap, size_t idx, size_t n);
+
+
+// Set a bit in the bitmap; returns `true` if it atomically transitioned from 0 to 1
+bool mi_bitmap_set(mi_bitmap_t* bitmap, size_t idx);
+
+// Clear a bit in the bitmap; returns `true` if it atomically transitioned from 1 to 0
+bool mi_bitmap_clear(mi_bitmap_t* bitmap, size_t idx);
+
+// Set a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 0's to 1's
+// If `already_set` is not NULL, it is set to count of bits were already all set.
+// (this is used for correct statistics if commiting over a partially committed area)
+bool mi_bitmap_setN(mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* already_set);
+
+// Clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 1's to 0's
+bool mi_bitmap_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n);
+
+
+// Is a sequence of n bits already all set/cleared?
+bool mi_bitmap_is_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
+
+// Is the bitmap completely clear?
+bool mi_bitmap_is_all_clear(mi_bitmap_t* bitmap);
+
+// Is a sequence of n bits already set?
+// (Used to check if a memory range is already committed)
+static inline bool mi_bitmap_is_setN(mi_bitmap_t* bitmap, size_t idx, size_t n) {
+  return mi_bitmap_is_xsetN(MI_BIT_SET, bitmap, idx, n);
+}
+
+// Is a sequence of n bits already clear?
+static inline bool mi_bitmap_is_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n) {
+  return mi_bitmap_is_xsetN(MI_BIT_CLEAR, bitmap, idx, n);
+}
+
+static inline bool mi_bitmap_is_set(mi_bitmap_t* bitmap, size_t idx) {
+  return mi_bitmap_is_setN(bitmap, idx, 1);
+}
+
+static inline bool mi_bitmap_is_clear(mi_bitmap_t* bitmap, size_t idx) {
+  return mi_bitmap_is_clearN(bitmap, idx, 1);
+}
+
+// Called once a bit is cleared to see if the memory slice can be claimed.
+typedef bool (mi_claim_fun_t)(size_t slice_index, mi_arena_t* arena, bool* keep_set);
+
+// Find a set bits in the bitmap, atomically clear it, and check if `claim` returns true.
+// If not claimed, continue on (potentially setting the bit again depending on `keep_set`).
+// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
+mi_decl_nodiscard bool mi_bitmap_try_find_and_claim(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx,
+                                                    mi_claim_fun_t* claim, mi_arena_t* arena );
+
+
+// Atomically clear a bit but only if it is set. Will block otherwise until the bit is set.
+// This is used to delay free-ing a page that it at the same time being considered to be
+// allocated from `mi_arena_try_abandoned` (and is in the `claim` function of `mi_bitmap_try_find_and_claim`).
+void mi_bitmap_clear_once_set(mi_bitmap_t* bitmap, size_t idx);
+
+
+// If a bit is set in the bitmap, return `true` and set `idx` to the index of the highest bit.
+// Otherwise return `false` (and `*idx` is undefined).
+// Used for unloading arena's
+bool mi_bitmap_bsr(mi_bitmap_t* bitmap, size_t* idx);
+
+// Return count of all set bits in a bitmap.
+size_t mi_bitmap_popcount(mi_bitmap_t* bitmap);
+
+
+typedef bool (mi_forall_set_fun_t)(size_t slice_index, size_t slice_count, mi_arena_t* arena, void* arg2);
+
+// Visit all set bits in a bitmap (`slice_count == 1`)
+bool _mi_bitmap_forall_set(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg);
+
+// Visit all set bits in a bitmap with larger ranges if possible (`slice_count >= 1`)
+// Ranges will never cross chunk boundaries though (and `slice_count <= MI_BCHUNK_BITS`)
+bool _mi_bitmap_forall_setc_ranges(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg);
+
+// Visit all set bits in a bitmap with at least `rngslices` at a time (and aligned to `rngslices`). 
+// This is used by purging to not break up transparent huge pages for example.
+// Ranges will never cross chunk boundaries (and `slice_count <= MI_BCHUNK_BITS`).
+bool _mi_bitmap_forall_setc_rangesn(mi_bitmap_t* bitmap, size_t rngslices, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg);
+
+// Count all set bits in given range in the bitmap.
+size_t mi_bitmap_popcountN( mi_bitmap_t* bitmap, size_t idx, size_t n);
+
+/* ----------------------------------------------------------------------------
+  Binned concurrent bitmap
+  Assigns a size class to each chunk such that small blocks don't cause too
+  much fragmentation since we keep chunks for larger blocks separate.
+---------------------------------------------------------------------------- */
+
+// mi_chunkbin_t is defined in mimalloc-stats.h
+
+static inline mi_chunkbin_t mi_chunkbin_inc(mi_chunkbin_t bbin) {
+  mi_assert_internal(bbin < MI_CBIN_COUNT);
+  return (mi_chunkbin_t)((int)bbin + 1);
+}
+
+static inline mi_chunkbin_t mi_chunkbin_dec(mi_chunkbin_t bbin) {
+  mi_assert_internal(bbin > MI_CBIN_NONE);
+  return (mi_chunkbin_t)((int)bbin - 1);
+}
+
+static inline mi_chunkbin_t mi_chunkbin_of(size_t slice_count) {
+  if (slice_count==1) return MI_CBIN_SMALL;
+  if (slice_count==8) return MI_CBIN_MEDIUM;
+  #if MI_ENABLE_LARGE_PAGES
+  if (slice_count==MI_BFIELD_BITS) return MI_CBIN_LARGE;
+  #endif
+  if (slice_count > MI_BCHUNK_BITS) return MI_CBIN_HUGE;
+  return MI_CBIN_OTHER;
+}
+
+// An atomic "binned" bitmap for the free slices where we keep chunks reserved for particular size classes
+typedef mi_decl_bchunk_align struct mi_bbitmap_s {
+  _Atomic(size_t)  chunk_count;         // total count of chunks (0 < N <= MI_BCHUNKMAP_BITS)
+  _Atomic(size_t)  chunk_max_accessed;  // max chunk index that was once cleared or set
+  #if (MI_BCHUNK_SIZE / MI_SIZE_SIZE) > 2
+  size_t           _padding[MI_BCHUNK_SIZE/MI_SIZE_SIZE - 2];    // suppress warning on msvc by aligning manually
+  #endif
+  mi_bchunkmap_t   chunkmap;
+  mi_bchunkmap_t   chunkmap_bins[MI_CBIN_COUNT - 1];             // chunkmaps with bit set if the chunk is in that size class (excluding MI_CBIN_NONE)
+  mi_bchunk_t      chunks[MI_BITMAP_DEFAULT_CHUNK_COUNT];        // usually dynamic MI_BITMAP_MAX_CHUNK_COUNT
+} mi_bbitmap_t;
+
+
+static inline size_t mi_bbitmap_chunk_count(const mi_bbitmap_t* bbitmap) {
+  return mi_atomic_load_relaxed(&((mi_bbitmap_t*)bbitmap)->chunk_count);
+}
+
+static inline size_t mi_bbitmap_max_bits(const mi_bbitmap_t* bbitmap) {
+  return (mi_bbitmap_chunk_count(bbitmap) * MI_BCHUNK_BITS);
+}
+
+mi_chunkbin_t mi_bbitmap_debug_get_bin(const mi_bchunk_t* chunkmap_bins, size_t chunk_idx);
+
+size_t mi_bbitmap_size(size_t bit_count, size_t* chunk_count);
+
+
+// Initialize a bitmap to all clear; avoid a mem_zero if `already_zero` is true
+// returns the size of the bitmap.
+size_t mi_bbitmap_init(mi_bbitmap_t* bbitmap, size_t bit_count, bool already_zero);
+
+// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks).
+// Not atomic so only use if still local to a thread.
+void mi_bbitmap_unsafe_setN(mi_bbitmap_t* bbitmap, size_t idx, size_t n);
+
+
+// Set a sequence of `n` bits in the bbitmap; returns `true` if atomically transitioned from all 0's to 1's
+bool mi_bbitmap_setN(mi_bbitmap_t* bbitmap, size_t idx, size_t n);
+
+
+// Is a sequence of n bits already all set/cleared?
+bool mi_bbitmap_is_xsetN(mi_xset_t set, mi_bbitmap_t* bbitmap, size_t idx, size_t n);
+
+// Is a sequence of n bits already set?
+// (Used to check if a memory range is already committed)
+static inline bool mi_bbitmap_is_setN(mi_bbitmap_t* bbitmap, size_t idx, size_t n) {
+  return mi_bbitmap_is_xsetN(MI_BIT_SET, bbitmap, idx, n);
+}
+
+// Is a sequence of n bits already clear?
+static inline bool mi_bbitmap_is_clearN(mi_bbitmap_t* bbitmap, size_t idx, size_t n) {
+  return mi_bbitmap_is_xsetN(MI_BIT_CLEAR, bbitmap, idx, n);
+}
+
+
+// Try to atomically transition `n` bits from all set to all clear. Returns `true` on succes.
+// `n` cannot cross chunk boundaries, where `n <= MI_CHUNK_BITS`.
+bool mi_bbitmap_try_clearNC(mi_bbitmap_t* bbitmap, size_t idx, size_t n);
+
+
+// Specialized versions for common bit sequence sizes
+bool mi_bbitmap_try_find_and_clear(mi_bbitmap_t* bbitmap, size_t tseq, size_t* pidx);  // 1-bit
+bool mi_bbitmap_try_find_and_clear8(mi_bbitmap_t* bbitmap, size_t tseq, size_t* pidx); // 8-bits
+// bool mi_bbitmap_try_find_and_clearX(mi_bbitmap_t* bbitmap, size_t tseq, size_t* pidx); // MI_BFIELD_BITS
+bool mi_bbitmap_try_find_and_clearNX(mi_bbitmap_t* bbitmap, size_t n, size_t tseq, size_t* pidx); // < MI_BFIELD_BITS
+bool mi_bbitmap_try_find_and_clearNC(mi_bbitmap_t* bbitmap, size_t n, size_t tseq, size_t* pidx); // > MI_BFIELD_BITS <= MI_BCHUNK_BITS
+bool mi_bbitmap_try_find_and_clearN_(mi_bbitmap_t* bbitmap, size_t n, size_t tseq, size_t* pidx); // > MI_BCHUNK_BITS
+
+// Find a sequence of `n` bits in the bbitmap with all bits set, and try to atomically clear all.
+// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
+mi_decl_nodiscard static inline bool mi_bbitmap_try_find_and_clearN(mi_bbitmap_t* bbitmap, size_t n, size_t tseq, size_t* pidx) {
+  if (n==1) return mi_bbitmap_try_find_and_clear(bbitmap, tseq, pidx);               // small pages
+  if (n==8) return mi_bbitmap_try_find_and_clear8(bbitmap, tseq, pidx);              // medium pages
+  // if (n==MI_BFIELD_BITS) return mi_bbitmap_try_find_and_clearX(bbitmap, tseq, pidx); // large pages
+  if (n==0) return false;
+  if (n<=MI_BFIELD_BITS) return mi_bbitmap_try_find_and_clearNX(bbitmap, tseq, n, pidx);
+  if (n<=MI_BCHUNK_BITS) return mi_bbitmap_try_find_and_clearNC(bbitmap, tseq, n, pidx);
+  return mi_bbitmap_try_find_and_clearN_(bbitmap, tseq, n, pidx);
+}
+
+
+#endif // MI_BITMAP_H
diff --git a/3rd/mimalloc-3.2.6/src/free.c b/3rd/mimalloc-3.2.6/src/free.c
new file mode 100644
index 00000000..ab15e269
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/free.c
@@ -0,0 +1,617 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2018-2025, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+#if !defined(MI_IN_ALLOC_C)
+#error "this file should be included from 'alloc.c' (so aliases can work from alloc-override)"
+// add includes help an IDE
+#include "mimalloc.h"
+#include "mimalloc/internal.h"
+#include "mimalloc/prim.h"   // _mi_prim_thread_id()
+#endif
+
+// forward declarations
+static void   mi_check_padding(const mi_page_t* page, const mi_block_t* block);
+static bool   mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block);
+static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block);
+static void   mi_stat_free(const mi_page_t* page, const mi_block_t* block);
+
+
+// ------------------------------------------------------
+// Free
+// ------------------------------------------------------
+
+// regular free of a (thread local) block pointer
+// fast path written carefully to prevent spilling on the stack
+static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool track_stats, bool check_full)
+{
+  // checks
+  if mi_unlikely(mi_check_is_double_free(page, block)) return;
+  mi_check_padding(page, block);
+  if (track_stats) { mi_stat_free(page, block); }
+  #if (MI_DEBUG>0) && !MI_TRACK_ENABLED  && !MI_TSAN && !MI_GUARDED
+  memset(block, MI_DEBUG_FREED, mi_page_block_size(page));
+  #endif
+  if (track_stats) { mi_track_free_size(block, mi_page_usable_size_of(page, block)); } // faster then mi_usable_size as we already know the page and that p is unaligned
+
+  // actual free: push on the local free list
+  mi_block_set_next(page, block, page->local_free);
+  page->local_free = block;
+  if mi_unlikely(--page->used == 0) {
+    _mi_page_retire(page);
+  }
+  else if mi_unlikely(check_full && mi_page_is_in_full(page)) {
+    _mi_page_unfull(page);
+  }
+}
+
+// Forward declaration for multi-threaded collect
+static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page, mi_block_t* mt_free) mi_attr_noexcept;
+
+// Free a block multi-threaded
+static inline void mi_free_block_mt(mi_page_t* page, mi_block_t* block) mi_attr_noexcept
+{
+  // adjust stats (after padding check and potentially recursive `mi_free` above)
+  mi_stat_free(page, block);    // stat_free may access the padding
+  mi_track_free_size(block, mi_page_usable_size_of(page, block));
+
+  // _mi_padding_shrink(page, block, sizeof(mi_block_t));
+#if (MI_DEBUG>0) && !MI_TRACK_ENABLED  && !MI_TSAN       // note: when tracking, cannot use mi_usable_size with multi-threading
+  size_t dbgsize = mi_usable_size(block);
+  if (dbgsize > MI_MiB) { dbgsize = MI_MiB; }
+  _mi_memset_aligned(block, MI_DEBUG_FREED, dbgsize);
+#endif
+
+  // push atomically on the page thread free list
+  mi_thread_free_t tf_new;
+  mi_thread_free_t tf_old = mi_atomic_load_relaxed(&page->xthread_free);
+  do {
+    mi_block_set_next(page, block, mi_tf_block(tf_old));
+    tf_new = mi_tf_create(block, true /* always use owned: try to claim it if the page is abandoned */);
+  } while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tf_old, tf_new)); // todo: release is enough?
+
+  // and atomically try to collect the page if it was abandoned
+  const bool is_owned_now = !mi_tf_is_owned(tf_old);
+  if (is_owned_now) {
+    mi_assert_internal(mi_page_is_abandoned(page));
+    mi_free_try_collect_mt(page,block);
+  }
+}
+
+
+// Adjust a block that was allocated aligned, to the actual start of the block in the page.
+// note: this can be called from `mi_free_generic_mt` where a non-owning thread accesses the
+// `page_start` and `block_size` fields; however these are constant and the page won't be
+// deallocated (as the block we are freeing keeps it alive) and thus safe to read concurrently.
+mi_block_t* _mi_page_ptr_unalign(const mi_page_t* page, const void* p) {
+  mi_assert_internal(page!=NULL && p!=NULL);
+
+  const size_t diff = (uint8_t*)p - mi_page_start(page);
+  const size_t block_size = mi_page_block_size(page);
+  const size_t adjust = (_mi_is_power_of_two(block_size) ? diff & (block_size - 1) : diff % block_size);
+  return (mi_block_t*)((uintptr_t)p - adjust);
+}
+
+// forward declaration for a MI_GUARDED build
+#if MI_GUARDED
+static void mi_block_unguard(mi_page_t* page, mi_block_t* block, void* p); // forward declaration
+static inline void mi_block_check_unguard(mi_page_t* page, mi_block_t* block, void* p) {
+  if (mi_block_ptr_is_guarded(block, p)) { mi_block_unguard(page, block, p); }
+}
+#else
+static inline void mi_block_check_unguard(mi_page_t* page, mi_block_t* block, void* p) {
+  MI_UNUSED(page); MI_UNUSED(block); MI_UNUSED(p);
+}
+#endif
+
+static inline mi_block_t* mi_validate_block_from_ptr( const mi_page_t* page, void* p ) {
+  mi_assert(_mi_page_ptr_unalign(page,p) == (mi_block_t*)p); // should never be an interior pointer
+  #if MI_SECURE > 0
+  // in secure mode we always unalign to guard against free-ing interior pointers
+  return _mi_page_ptr_unalign(page,p);
+  #else
+  MI_UNUSED(page);
+  return (mi_block_t*)p;
+  #endif
+}
+
+
+// free a local pointer  (page parameter comes first for better codegen)
+static void mi_decl_noinline mi_free_generic_local(mi_page_t* page, void* p) mi_attr_noexcept {
+  mi_assert_internal(p!=NULL && page != NULL);
+  mi_block_t* const block = (mi_page_has_interior_pointers(page) ? _mi_page_ptr_unalign(page, p) : mi_validate_block_from_ptr(page,p));
+  mi_block_check_unguard(page, block, p);
+  mi_free_block_local(page, block, true /* track stats */, true /* check for a full page */);
+}
+
+// free a pointer owned by another thread (page parameter comes first for better codegen)
+static void mi_decl_noinline mi_free_generic_mt(mi_page_t* page, void* p) mi_attr_noexcept {
+  mi_assert_internal(p!=NULL && page != NULL);
+  mi_block_t* const block = (mi_page_has_interior_pointers(page) ? _mi_page_ptr_unalign(page, p) : mi_validate_block_from_ptr(page,p));
+  mi_block_check_unguard(page, block, p);
+  mi_free_block_mt(page, block);
+}
+
+// generic free (for runtime integration)
+void mi_decl_noinline _mi_free_generic(mi_page_t* page, bool is_local, void* p) mi_attr_noexcept {
+  if (is_local) mi_free_generic_local(page,p);
+           else mi_free_generic_mt(page,p);
+}
+
+
+// Get the page belonging to a pointer
+// Does further checks in debug mode to see if this was a valid pointer.
+static inline mi_page_t* mi_validate_ptr_page(const void* p, const char* msg)
+{
+  MI_UNUSED_RELEASE(msg);
+  #if MI_DEBUG
+  if mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0 && !mi_option_is_enabled(mi_option_guarded_precise)) {
+    _mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p);
+    return NULL;
+  }
+  mi_page_t* page = _mi_safe_ptr_page(p);
+  if (p != NULL && page == NULL) {
+    _mi_error_message(EINVAL, "%s: invalid pointer: %p\n", msg, p);
+  }
+  return page;
+  #else
+  return _mi_ptr_page(p);
+  #endif
+}
+
+// Free a block
+// Fast path written carefully to prevent register spilling on the stack
+static mi_decl_forceinline void mi_free_ex(void* p, size_t* usable)  
+{
+  mi_page_t* const page = mi_validate_ptr_page(p,"mi_free");
+  if mi_unlikely(page==NULL) return;  // page will be NULL if p==NULL
+  mi_assert_internal(p!=NULL && page!=NULL);
+  if (usable!=NULL) { *usable = mi_page_usable_block_size(page); }
+
+  const mi_threadid_t xtid = (_mi_prim_thread_id() ^ mi_page_xthread_id(page));
+  if mi_likely(xtid == 0) {                        // `tid == mi_page_thread_id(page) && mi_page_flags(page) == 0`
+    // thread-local, aligned, and not a full page
+    mi_block_t* const block = mi_validate_block_from_ptr(page,p);
+    mi_free_block_local(page, block, true /* track stats */, false /* no need to check if the page is full */);
+  }
+  else if (xtid <= MI_PAGE_FLAG_MASK) {            // `tid == mi_page_thread_id(page) && mi_page_flags(page) != 0`
+    // page is local, but is full or contains (inner) aligned blocks; use generic path
+    mi_free_generic_local(page, p);
+  }
+  // free-ing in a page owned by a theap in another thread, or an abandoned page (not belonging to a theap)
+  else if ((xtid & MI_PAGE_FLAG_MASK) == 0) {      // `tid != mi_page_thread_id(page) && mi_page_flags(page) == 0`
+    // blocks are aligned (and not a full page); push on the thread_free list
+    mi_block_t* const block = mi_validate_block_from_ptr(page,p);
+    mi_free_block_mt(page,block);
+  }
+  else {
+    // page is full or contains (inner) aligned blocks; use generic multi-thread path
+    mi_free_generic_mt(page, p);
+  }
+}
+
+void mi_free(void* p) mi_attr_noexcept {
+  mi_free_ex(p, NULL);
+}
+
+void mi_ufree(void* p, size_t* usable) mi_attr_noexcept {
+  mi_free_ex(p, usable);
+}
+
+// --------------------------------------------------------------------------------------------
+// `mi_free_try_collect_mt`: Potentially collect a page in a free in an abandoned page.
+// 1. if the page becomes empty, free it
+// 2. if it can be reclaimed, reclaim it in our theap
+// 3. if it went to < 7/8th used, re-abandon to be mapped (so it can be found by theaps looking for free pages)
+// --------------------------------------------------------------------------------------------
+
+// Helper for mi_free_try_collect_mt: free if the page has no more used blocks (this is updated by `_mi_page_free_collect(_partly)`)
+static bool mi_abandoned_page_try_free(mi_page_t* page)
+{
+  if (!mi_page_all_free(page)) return false;
+  // first remove it from the abandoned pages in the arena (if mapped, this might wait for any readers to finish)
+  _mi_arenas_page_unabandon(page,NULL);
+  _mi_arenas_page_free(page,NULL); // we can now free the page directly
+  return true;
+}
+
+// Helper for mi_free_try_collect_mt: try if we can reabandon a previously abandoned mostly full page to be mapped
+static bool mi_abandoned_page_try_reabandon_to_mapped(mi_page_t* page)
+{
+  // if the page is unmapped, try to reabandon so it can possibly be mapped and found for allocations
+  // We only reabandon if a full page starts to have enough blocks available to prevent immediate re-abandon of a full page
+  if (mi_page_is_mostly_used(page)) return false;   // not too full
+  if (page->memid.memkind != MI_MEM_ARENA || mi_page_is_abandoned_mapped(page)) return false;  // and not already mapped (or unmappable)
+
+  mi_assert(!mi_page_is_full(page));
+  return _mi_arenas_page_try_reabandon_to_mapped(page);
+}
+
+// Release ownership of a page. This may free or reabandond the page if other blocks are concurrently
+// freed in the meantime. Returns `true` if the page was freed.
+// By passing the captured `expected_thread_free`, we can often avoid calling `mi_page_free_collect`.
+static void mi_abandoned_page_unown_from_free(mi_page_t* page, mi_block_t* expected_thread_free) {
+  mi_assert_internal(mi_page_is_owned(page));
+  mi_assert_internal(mi_page_is_abandoned(page));
+  mi_assert_internal(!mi_page_all_free(page));
+  // try to cas atomically the original free list (`mt_free`) back with the ownership cleared.
+  mi_thread_free_t tf_expect = mi_tf_create(expected_thread_free, true);
+  mi_thread_free_t tf_new    = mi_tf_create(expected_thread_free, false);
+  while mi_unlikely(!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tf_expect, tf_new)) {
+    mi_assert_internal(mi_tf_is_owned(tf_expect));
+    // while the xthread_free list is not empty..
+    while (mi_tf_block(tf_expect) != NULL) {
+      // if there were concurrent updates to the thread-free list, we retry to free or reabandon to mapped (if it became !mosty_used).
+      _mi_page_free_collect(page,false);  // update used count
+      if (mi_abandoned_page_try_free(page)) return;
+      if (mi_abandoned_page_try_reabandon_to_mapped(page)) return;
+      // otherwise continue un-owning
+      tf_expect = mi_atomic_load_relaxed(&page->xthread_free);
+    }
+    // and try again to release ownership
+    mi_assert_internal(mi_tf_block(tf_expect)==NULL);
+    tf_new = mi_tf_create(NULL, false);
+  }
+}
+
+static inline bool mi_page_queue_len_is_atmost( mi_theap_t* theap, size_t block_size, long atmost) {
+  if (atmost < 0) return false;
+  mi_page_queue_t* const pq = mi_page_queue(theap,block_size);
+  mi_assert_internal(pq!=NULL);
+  return (pq->count <= (size_t)atmost);
+}
+
+// Helper for mi_free_try_collect_mt:  try to reclaim the page for ourselves
+static mi_decl_noinline bool mi_abandoned_page_try_reclaim(mi_page_t* page, long reclaim_on_free) mi_attr_noexcept
+{
+  // note: reclaiming can improve benchmarks like `larson` or `rbtree-ck` a lot even in the single-threaded case,
+  // since free-ing from an owned page avoids atomic operations. However, if we reclaim too eagerly in
+  // a multi-threaded scenario we may start to hold on to too much memory and reduce reuse among threads.
+  // If the current theap is where the page originally came from, we reclaim much more eagerly while
+  // 'cross-thread' reclaiming on free is by default off (and we only 'reclaim' these by finding the abandoned
+  // pages when we allocate a fresh page).
+  mi_assert_internal(mi_page_is_owned(page));
+  mi_assert_internal(mi_page_is_abandoned(page));
+  mi_assert_internal(!mi_page_all_free(page));
+  mi_assert_internal(page->block_size <= MI_SMALL_SIZE_MAX);
+  mi_assert_internal(reclaim_on_free >= 0);
+
+  // get our theap 
+  // note: don't use `mi_heap_theap()` as we may just have terminated this thread and we should
+  // not reinitialize the theap for this thread. (can happen due to thread-local destructors for example -- issue #944)
+  mi_theap_t* const theap = _mi_page_associated_theap_peek(page);
+  if (theap==NULL || !theap->allow_page_reclaim) return false;
+  
+  // todo: cache `is_in_threadpool` and `exclusive_arena` directly in the theap for performance?
+  // set max_reclaim limit
+  long max_reclaim = 0;
+  if mi_likely(theap == page->theap) {  // did this page originate from the current theap? (and thus allocated from this thread)
+    // originating theap
+    max_reclaim = _mi_option_get_fast(theap->tld->is_in_threadpool ? mi_option_page_cross_thread_max_reclaim : mi_option_page_max_reclaim);
+  }
+  else if (reclaim_on_free == 1 &&               // if cross-thread is allowed
+            !theap->tld->is_in_threadpool &&      // and we are not part of a threadpool
+            !mi_page_is_mostly_used(page) &&     // and the page is not too full
+            _mi_arena_memid_is_suitable(page->memid, theap->heap->exclusive_arena)) {   // and it fits our memory
+    // across threads
+    max_reclaim = _mi_option_get_fast(mi_option_page_cross_thread_max_reclaim);
+  }
+
+  // are we within the reclaim limit?
+  if (max_reclaim >= 0 && !mi_page_queue_len_is_atmost(theap, page->block_size, max_reclaim)) {
+    return false;
+  }
+
+  // reclaim the page into this theap
+  // first remove it from the abandoned pages in the arena -- this might wait for any readers to finish
+  _mi_arenas_page_unabandon(page, theap);
+  _mi_theap_page_reclaim(theap, page);
+  mi_theap_stat_counter_increase(theap, pages_reclaim_on_free, 1);
+  return true;
+}
+
+
+// We freed a block in an abandoned page (that was not owned). Try to collect
+static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page, mi_block_t* mt_free) mi_attr_noexcept
+{
+  mi_assert_internal(mi_page_is_owned(page));
+  mi_assert_internal(mi_page_is_abandoned(page));
+  mi_assert_internal(mt_free != NULL);
+
+  // we own the page now, and it is safe to collect the thread atomic free list
+  if (page->block_size <= MI_SMALL_SIZE_MAX) {
+    // use the `_partly` version to avoid atomic operations since we already have the `mt_free` pointing into the thread free list
+    // (after this the `used` count might be too high (as some blocks may have been concurrently added to the thread free list and are yet uncounted).
+    //  however, if the page became completely free, the used count is guaranteed to be 0.)
+    mi_assert_internal(page->reserved>=16); // below this even one freed block goes from full to no longer mostly used.
+    _mi_page_free_collect_partly(page, mt_free);    
+  }
+  else {
+    // for larger blocks we use the regular collect 
+    _mi_page_free_collect(page,false /* no force */);
+    mt_free = NULL; // expected page->xthread_free value after collection
+  }
+  const long reclaim_on_free = _mi_option_get_fast(mi_option_page_reclaim_on_free);
+  #if MI_DEBUG > 1
+  if (mi_page_is_singleton(page)) { mi_assert_internal(mi_page_all_free(page)); }
+  if (mi_page_is_full(page))      { mi_assert(mi_page_is_mostly_used(page)); }
+  #endif
+
+  // try to: 1. free it, 2. reclaim it, or 3. reabandon it to be mapped
+  if (mi_abandoned_page_try_free(page)) return;
+  if (page->block_size <= MI_SMALL_SIZE_MAX && reclaim_on_free >= 0) {  // early test for better codegen
+    if (mi_abandoned_page_try_reclaim(page, reclaim_on_free)) return;
+  }
+  if (mi_abandoned_page_try_reabandon_to_mapped(page)) return;
+
+  // otherwise unown the page again
+  mi_abandoned_page_unown_from_free(page, mt_free);
+}
+
+
+// ------------------------------------------------------
+// Usable size
+// ------------------------------------------------------
+
+// Bytes available in a block
+static size_t mi_decl_noinline mi_page_usable_aligned_size_of(const mi_page_t* page, const void* p) mi_attr_noexcept {
+  const mi_block_t* block = _mi_page_ptr_unalign(page, p);
+  const size_t size = mi_page_usable_size_of(page, block);
+  const ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)block;
+  mi_assert_internal(adjust >= 0 && (size_t)adjust <= size);
+  const size_t aligned_size = (size - adjust);
+  #if MI_GUARDED
+  if (mi_block_ptr_is_guarded(block, p)) {
+    return aligned_size - _mi_os_page_size();
+  }
+  #endif
+  return aligned_size;
+}
+
+static inline size_t _mi_usable_size(const void* p, const mi_page_t* page) mi_attr_noexcept {
+  if mi_unlikely(page==NULL) return 0;
+  if mi_likely(!mi_page_has_interior_pointers(page)) {
+    const mi_block_t* block = (const mi_block_t*)p;
+    return mi_page_usable_size_of(page, block);
+  }
+  else {
+    // split out to separate routine for improved code generation
+    return mi_page_usable_aligned_size_of(page, p);
+  }
+}
+
+mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept {
+  const mi_page_t* const page = mi_validate_ptr_page(p,"mi_usable_size");
+  return _mi_usable_size(p,page);
+}
+
+
+// ------------------------------------------------------
+// Free variants
+// ------------------------------------------------------
+
+void mi_free_size(void* p, size_t size) mi_attr_noexcept {
+  MI_UNUSED_RELEASE(size);
+  #if MI_DEBUG
+  const mi_page_t* const page = mi_validate_ptr_page(p,"mi_free_size");  
+  const size_t available = _mi_usable_size(p,page);
+  mi_assert(p == NULL || size <= available || available == 0 /* invalid pointer */ );
+  #endif
+  mi_free(p);
+}
+
+void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept {
+  MI_UNUSED_RELEASE(alignment);
+  mi_assert(((uintptr_t)p % alignment) == 0);
+  mi_free_size(p,size);
+}
+
+void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept {
+  MI_UNUSED_RELEASE(alignment);
+  mi_assert(((uintptr_t)p % alignment) == 0);
+  mi_free(p);
+}
+
+
+// ------------------------------------------------------
+// Check for double free in secure and debug mode
+// This is somewhat expensive so only enabled for secure mode 4
+// ------------------------------------------------------
+
+#if (MI_ENCODE_FREELIST && (MI_SECURE>=4 || MI_DEBUG!=0))
+// linear check if the free list contains a specific element
+static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, const mi_block_t* elem) {
+  while (list != NULL) {
+    if (elem==list) return true;
+    list = mi_block_next(page, list);
+  }
+  return false;
+}
+
+static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) {
+  // The decoded value is in the same page (or NULL).
+  // Walk the free lists to verify positively if it is already freed
+  if (mi_list_contains(page, page->free, block) ||
+      mi_list_contains(page, page->local_free, block) ||
+      mi_list_contains(page, mi_page_thread_free(page), block))
+  {
+    _mi_error_message(EAGAIN, "double free detected of block %p with size %zu\n", block, mi_page_block_size(page));
+    return true;
+  }
+  return false;
+}
+
+#define mi_track_page(page,access)  { size_t psize; void* pstart = _mi_page_start(_mi_page_segment(page),page,&psize); mi_track_mem_##access( pstart, psize); }
+
+static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
+  bool is_double_free = false;
+  mi_block_t* n = mi_block_nextx(page, block, page->keys); // pretend it is freed, and get the decoded first field
+  if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 &&  // quick check: aligned pointer?
+      (n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL?
+  {
+    // Suspicious: decoded value a in block is in the same page (or NULL) -- maybe a double free?
+    // (continue in separate function to improve code generation)
+    is_double_free = mi_check_is_double_freex(page, block);
+  }
+  return is_double_free;
+}
+#else
+static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
+  MI_UNUSED(page);
+  MI_UNUSED(block);
+  return false;
+}
+#endif
+
+
+// ---------------------------------------------------------------------------
+// Check for theap block overflow by setting up padding at the end of the block
+// ---------------------------------------------------------------------------
+
+#if MI_PADDING // && !MI_TRACK_ENABLED
+static bool mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* delta, size_t* bsize) {
+  *bsize = mi_page_usable_block_size(page);
+  const mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + *bsize);
+  mi_track_mem_defined(padding,sizeof(mi_padding_t));
+  *delta = padding->delta;
+  uint32_t canary = padding->canary;
+  uintptr_t keys[2];
+  keys[0] = page->keys[0];
+  keys[1] = page->keys[1];
+  bool ok = (mi_ptr_encode_canary(page,block,keys) == canary && *delta <= *bsize);
+  mi_track_mem_noaccess(padding,sizeof(mi_padding_t));
+  return ok;
+}
+
+// Return the exact usable size of a block.
+static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) {
+  size_t bsize;
+  size_t delta;
+  bool ok = mi_page_decode_padding(page, block, &delta, &bsize);
+  mi_assert_internal(ok); mi_assert_internal(delta <= bsize);
+  return (ok ? bsize - delta : 0);
+}
+
+// When a non-thread-local block is freed, it becomes part of the thread delayed free
+// list that is freed later by the owning theap. If the exact usable size is too small to
+// contain the pointer for the delayed list, then shrink the padding (by decreasing delta)
+// so it will later not trigger an overflow error in `mi_free_block`.
+void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) {
+  size_t bsize;
+  size_t delta;
+  bool ok = mi_page_decode_padding(page, block, &delta, &bsize);
+  mi_assert_internal(ok);
+  if (!ok || (bsize - delta) >= min_size) return;  // usually already enough space
+  mi_assert_internal(bsize >= min_size);
+  if (bsize < min_size) return;  // should never happen
+  size_t new_delta = (bsize - min_size);
+  mi_assert_internal(new_delta < bsize);
+  mi_padding_t* padding = (mi_padding_t*)((uint8_t*)block + bsize);
+  mi_track_mem_defined(padding,sizeof(mi_padding_t));
+  padding->delta = (uint32_t)new_delta;
+  mi_track_mem_noaccess(padding,sizeof(mi_padding_t));
+}
+#else
+static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) {
+  MI_UNUSED(block);
+  return mi_page_usable_block_size(page);
+}
+
+void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) {
+  MI_UNUSED(page);
+  MI_UNUSED(block);
+  MI_UNUSED(min_size);
+}
+#endif
+
+#if MI_PADDING && MI_PADDING_CHECK
+
+static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, size_t* size, size_t* wrong) {
+  size_t bsize;
+  size_t delta;
+  bool ok = mi_page_decode_padding(page, block, &delta, &bsize);
+  *size = *wrong = bsize;
+  if (!ok) return false;
+  mi_assert_internal(bsize >= delta);
+  *size = bsize - delta;
+  if (!mi_page_is_huge(page)) {
+    uint8_t* fill = (uint8_t*)block + bsize - delta;
+    const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes
+    mi_track_mem_defined(fill, maxpad);
+    for (size_t i = 0; i < maxpad; i++) {
+      if (fill[i] != MI_DEBUG_PADDING) {
+        *wrong = bsize - delta + i;
+        ok = false;
+        break;
+      }
+    }
+    mi_track_mem_noaccess(fill, maxpad);
+  }
+  return ok;
+}
+
+static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
+  size_t size;
+  size_t wrong;
+  if (!mi_verify_padding(page,block,&size,&wrong)) {
+    _mi_error_message(EFAULT, "buffer overflow in theap block %p of size %zu: write after %zu bytes\n", block, size, wrong );
+  }
+}
+
+#else
+
+static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
+  MI_UNUSED(page);
+  MI_UNUSED(block);
+}
+
+#endif
+
+// only maintain stats for smaller objects if requested
+#if (MI_STAT>0)
+static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
+  MI_UNUSED(block);
+  mi_theap_t* const theap = mi_theap_get_default();
+  const size_t bsize = mi_page_usable_block_size(page);
+  // #if (MI_STAT>1)
+  // const size_t usize = mi_page_usable_size_of(page, block);
+  // mi_theap_stat_decrease(theap, malloc_requested, usize);
+  // #endif
+  if (bsize <= MI_LARGE_MAX_OBJ_SIZE) {
+    mi_theap_stat_decrease(theap, malloc_normal, bsize);
+    #if (MI_STAT > 1)
+    mi_theap_stat_decrease(theap, malloc_bins[_mi_bin(bsize)], 1);
+    #endif
+  }
+  else {
+    const size_t bpsize = mi_page_block_size(page);  // match stat in page.c:mi_huge_page_alloc
+    mi_theap_stat_decrease(theap, malloc_huge, bpsize);
+  }
+}
+#else
+void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
+  MI_UNUSED(page); MI_UNUSED(block);
+}
+#endif
+
+
+// Remove guard page when building with MI_GUARDED
+#if MI_GUARDED
+static void mi_block_unguard(mi_page_t* page, mi_block_t* block, void* p) {
+  MI_UNUSED(p);
+  mi_assert_internal(mi_block_ptr_is_guarded(block, p));
+  mi_assert_internal(mi_page_has_interior_pointers(page));
+  mi_assert_internal((uint8_t*)p - (uint8_t*)block >= (ptrdiff_t)sizeof(mi_block_t));
+  mi_assert_internal(block->next == MI_BLOCK_TAG_GUARDED);
+
+  const size_t bsize = mi_page_block_size(page);
+  const size_t psize = _mi_os_page_size();
+  mi_assert_internal(bsize > psize);
+  mi_assert_internal(!page->memid.is_pinned);
+  void* gpage = (uint8_t*)block + bsize - psize;
+  mi_assert_internal(_mi_is_aligned(gpage, psize));
+  _mi_os_unprotect(gpage, psize);
+}
+#endif
diff --git a/3rd/mimalloc-3.2.6/src/heap.c b/3rd/mimalloc-3.2.6/src/heap.c
new file mode 100644
index 00000000..7f566697
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/heap.c
@@ -0,0 +1,214 @@
+/*----------------------------------------------------------------------------
+Copyright (c) 2018-2025, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+#include "mimalloc.h"
+#include "mimalloc/internal.h"
+#include "mimalloc/prim.h"  // _mi_theap_default
+
+
+/* -----------------------------------------------------------
+  Heap's
+----------------------------------------------------------- */
+
+mi_theap_t* mi_heap_theap(mi_heap_t* heap) {
+  return _mi_heap_theap(heap);
+}
+
+void mi_heap_set_numa_affinity(mi_heap_t* heap, int numa_node) {
+  if (heap==NULL) { heap = mi_heap_main(); }
+  heap->numa_node = (numa_node < 0 ? -1 : numa_node % _mi_os_numa_node_count());
+}
+
+void mi_heap_stats_merge_to_subproc(mi_heap_t* heap) {
+  if (heap==NULL) { heap = mi_heap_main(); }
+  _mi_stats_merge_into(&heap->subproc->stats, &heap->stats);
+}
+
+void mi_heap_stats_merge_to_main(mi_heap_t* heap) {
+  if (heap==NULL) return;
+  _mi_stats_merge_into(&mi_heap_main()->stats, &heap->stats);
+}
+
+static mi_theap_t* mi_heap_init_theap(const mi_heap_t* const_heap)
+{
+  mi_heap_t* heap = (mi_heap_t*)const_heap;
+  mi_assert_internal(heap!=NULL);
+
+  if (_mi_is_heap_main(heap)) {
+    // this can be called if the (main) thread is not yet initialized (as no allocation happened)
+    mi_thread_init();
+    mi_theap_t* theap = _mi_heap_theap(heap);
+    mi_assert_internal(theap!=NULL);
+    return theap;
+  }
+
+  // otherwise initialize the theap for this heap
+  // get the thread local
+  mi_theap_t* theap = NULL;
+  if (heap->theap==0) {
+    // initialize thread locals
+    heap->theap = _mi_thread_local_create();
+    if (heap->theap==0) {
+      _mi_error_message(EFAULT, "unable to dynamically create a thread local for a heap\n");
+      return NULL;
+    }
+  }
+  else {
+    // get current thread local
+    theap = (mi_theap_t*)_mi_thread_local_get(heap->theap);
+  }
+
+  // create a fresh theap?
+  if (theap==NULL) {
+    theap = _mi_theap_create(heap, _mi_theap_default_safe()->tld);
+    if (theap==NULL) {
+      _mi_error_message(EFAULT, "unable to allocate memory for a thread local heap\n");
+      return NULL;
+    }
+    if (!_mi_thread_local_set(heap->theap, theap)) {
+      _mi_error_message(EFAULT, "unable to allocate memory for a thread local storage\n");
+      return NULL;
+    }
+  }
+  return theap;
+}
+
+
+// get the theap for a heap without initializing (and return NULL in that case)
+mi_theap_t* _mi_heap_theap_get_peek(const mi_heap_t* heap) {
+  if (heap==NULL || _mi_is_heap_main(heap)) {
+    return __mi_theap_main;  // don't call _mi_theap_main as it may still be NULL
+  }
+  else {
+    return (mi_theap_t*)_mi_thread_local_get(heap->theap);
+  }
+}
+
+// get (and possibly create) the theap belonging to a heap
+mi_theap_t* _mi_heap_theap_get_or_init(const mi_heap_t* heap)
+{
+  mi_theap_t* theap = _mi_heap_theap_peek(heap);
+  if mi_unlikely(theap==NULL) {
+    theap = mi_heap_init_theap(heap);
+    if (theap==NULL) { return (mi_theap_t*)&_mi_theap_empty_wrong; }  // this will return NULL from page.c:_mi_malloc_generic
+  }
+  _mi_theap_cached_set(theap);
+  return theap;
+}
+
+
+mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t exclusive_arena_id) {
+  // always allocate heap data in the (subprocess) main heap
+  mi_heap_t* heap_main = mi_heap_main();
+  // todo: allocate heap data in the exclusive arena ?
+  mi_heap_t* heap = (mi_heap_t*)mi_heap_zalloc( heap_main, sizeof(mi_heap_t) );
+  if (heap==NULL) return NULL;
+
+  // init fields
+  heap->subproc = heap_main->subproc;
+  heap->heap_seq = mi_atomic_increment_relaxed(&heap_main->subproc->heap_total_count);
+  heap->exclusive_arena = _mi_arena_from_id(exclusive_arena_id);
+  heap->numa_node = -1; // no initial affinity
+
+  mi_lock_init(&heap->theaps_lock);
+  mi_lock_init(&heap->os_abandoned_pages_lock);
+  mi_lock_init(&heap->arena_pages_lock);
+
+  // push onto the subproc heaps
+  mi_lock(&heap->subproc->heaps_lock) {
+    mi_heap_t* head = heap->subproc->heaps;
+    heap->prev = NULL;
+    heap->next = head;
+    if (head!=NULL) { head->prev = heap;  }
+    heap->subproc->heaps = heap;
+  }
+  mi_atomic_increment_relaxed(&heap_main->subproc->heap_count);
+  mi_subproc_stat_increase(heap_main->subproc, heaps, 1);
+  return heap;
+}
+
+mi_heap_t* mi_heap_new(void) {
+  return mi_heap_new_in_arena(0);
+}
+
+// free the heap resources (assuming the pages are already moved/destroyed)
+static void mi_heap_free(mi_heap_t* heap) {
+  mi_assert_internal(heap!=NULL && !_mi_is_heap_main(heap));
+
+  // free all theaps belonging to this heap
+  mi_theap_t* theap = NULL;
+  mi_lock(&heap->theaps_lock) { theap = heap->theaps; }
+  while(theap != NULL) {
+    mi_theap_t* next = NULL;
+    mi_lock(&heap->theaps_lock) { next = theap->hnext; }
+    _mi_theap_free(theap);
+    theap = next;
+  }
+  mi_lock(&heap->theaps_lock) { theap = heap->theaps; }
+  mi_assert_internal(theap==NULL);
+
+  // free all arena pages infos
+  mi_lock(&heap->arena_pages_lock) {
+    for (size_t i = 0; i < MI_MAX_ARENAS; i++) {
+      mi_arena_pages_t* arena_pages = mi_atomic_load_relaxed(&heap->arena_pages[i]);
+      if (arena_pages!=NULL) {
+        mi_atomic_store_relaxed(&heap->arena_pages[i], NULL);
+        mi_free(arena_pages);
+      }
+    }
+  }
+
+  // remove the heap from the subproc
+  mi_heap_stats_merge_to_main(heap);
+  mi_atomic_decrement_relaxed(&heap->subproc->heap_count);
+  mi_subproc_stat_decrease(heap->subproc, heaps, 1);
+  mi_lock(&heap->subproc->heaps_lock) {
+    if (heap->next!=NULL) { heap->next->prev = heap->prev; }
+    if (heap->prev!=NULL) { heap->prev->next = heap->next; }
+                     else { heap->subproc->heaps = heap->next; }
+  }
+
+  _mi_thread_local_free(heap->theap);
+  mi_lock_done(&heap->theaps_lock);
+  mi_lock_done(&heap->os_abandoned_pages_lock);
+  mi_free(heap);
+}
+
+void mi_heap_delete(mi_heap_t* heap) {
+  if (heap==NULL) return;
+  if (_mi_is_heap_main(heap)) {
+    _mi_warning_message("cannot delete the main heap\n");
+    return;
+  }
+  _mi_heap_move_pages(heap, mi_heap_main());
+  mi_heap_free(heap);
+}
+
+void mi_heap_destroy(mi_heap_t* heap) {
+  if (heap==NULL) return;
+  if (_mi_is_heap_main(heap)) {
+    _mi_warning_message("cannot destroy the main heap\n");
+    return;
+  }
+  _mi_heap_destroy_pages(heap);
+  mi_heap_free(heap);
+}
+
+mi_heap_t* mi_heap_of(void* p) {
+  mi_page_t* page = _mi_safe_ptr_page(p);
+  if (page==NULL) return NULL;
+  return mi_page_heap(page);
+}
+
+bool mi_any_heap_contains(void* p) {
+  return (mi_heap_of(p)!=NULL);
+}
+
+bool mi_heap_contains(mi_heap_t* heap, void* p) {
+  if (heap==NULL) { heap = mi_heap_main(); }
+  return (heap==mi_heap_of(p));
+}
diff --git a/3rd/mimalloc-3.2.6/src/init.c b/3rd/mimalloc-3.2.6/src/init.c
new file mode 100644
index 00000000..b779aebc
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/init.c
@@ -0,0 +1,957 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2018-2025, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+#include "mimalloc.h"
+#include "mimalloc/internal.h"
+#include "mimalloc/prim.h"
+
+#include <string.h>  // memcpy, memset
+#include <stdlib.h>  // atexit
+
+#define MI_MEMID_INIT(kind)   {{{NULL,0}}, kind, true /* pinned */, true /* committed */, false /* zero */ }
+#define MI_MEMID_STATIC       MI_MEMID_INIT(MI_MEM_STATIC)
+
+// Empty page used to initialize the small free pages array
+const mi_page_t _mi_page_empty = {
+  MI_ATOMIC_VAR_INIT(0),  // xthread_id
+  NULL,                   // free
+  0,                      // used
+  0,                      // capacity
+  0,                      // reserved capacity
+  0,                      // retire_expire
+  false,                  // is_zero
+  NULL,                   // local_free
+  MI_ATOMIC_VAR_INIT(0),  // xthread_free
+  0,                      // block_size
+  NULL,                   // page_start
+  #if (MI_PADDING || MI_ENCODE_FREELIST)
+  { 0, 0 },               // keys
+  #endif
+  NULL,                   // theap
+  NULL,                   // heap
+  NULL, NULL,             // next, prev
+  MI_ARENA_SLICE_SIZE,    // page_committed
+  MI_MEMID_STATIC         // memid
+};
+
+#define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty)
+
+#if (MI_PADDING>0) && (MI_INTPTR_SIZE >= 8)
+#define MI_SMALL_PAGES_EMPTY  { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() }
+#elif (MI_PADDING>0)
+#define MI_SMALL_PAGES_EMPTY  { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() }
+#else
+#define MI_SMALL_PAGES_EMPTY  { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY() }
+#endif
+
+
+// Empty page queues for every bin
+#define QNULL(sz)  { NULL, NULL, 0, (sz)*sizeof(uintptr_t) }
+#define MI_PAGE_QUEUES_EMPTY \
+  { QNULL(1), \
+    QNULL(     1), QNULL(     2), QNULL(     3), QNULL(     4), QNULL(     5), QNULL(     6), QNULL(     7), QNULL(     8), /* 8 */ \
+    QNULL(    10), QNULL(    12), QNULL(    14), QNULL(    16), QNULL(    20), QNULL(    24), QNULL(    28), QNULL(    32), /* 16 */ \
+    QNULL(    40), QNULL(    48), QNULL(    56), QNULL(    64), QNULL(    80), QNULL(    96), QNULL(   112), QNULL(   128), /* 24 */ \
+    QNULL(   160), QNULL(   192), QNULL(   224), QNULL(   256), QNULL(   320), QNULL(   384), QNULL(   448), QNULL(   512), /* 32 */ \
+    QNULL(   640), QNULL(   768), QNULL(   896), QNULL(  1024), QNULL(  1280), QNULL(  1536), QNULL(  1792), QNULL(  2048), /* 40 */ \
+    QNULL(  2560), QNULL(  3072), QNULL(  3584), QNULL(  4096), QNULL(  5120), QNULL(  6144), QNULL(  7168), QNULL(  8192), /* 48 */ \
+    QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \
+    QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \
+    QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), QNULL(393216), QNULL(458752), QNULL(524288), /* 72 */ \
+    QNULL(MI_LARGE_MAX_OBJ_WSIZE + 1  /* 655360, Huge queue */), \
+    QNULL(MI_LARGE_MAX_OBJ_WSIZE + 2) /* Full queue */ }
+
+#define MI_STAT_COUNT_NULL()  {0,0,0}
+
+// Empty statistics
+#define MI_STATS_NULL  \
+  MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
+  { 0 }, { 0 }, \
+  MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
+  MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
+  { 0 }, { 0 }, { 0 }, { 0 }, \
+  { 0 }, { 0 }, { 0 }, { 0 }, \
+  \
+  { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, \
+  MI_INIT5(MI_STAT_COUNT_NULL), \
+  { 0 }, { 0 }, { 0 }, { 0 },  \
+  \
+  { MI_INIT4(MI_STAT_COUNT_NULL) }, \
+  { { 0 }, { 0 }, { 0 }, { 0 } }, \
+  \
+  { MI_INIT74(MI_STAT_COUNT_NULL) }, \
+  { MI_INIT74(MI_STAT_COUNT_NULL) }, \
+  { MI_INIT5(MI_STAT_COUNT_NULL) }
+
+// --------------------------------------------------------
+// Statically allocate an empty theap as the initial
+// thread local value for the default theap,
+// and statically allocate the backing theap for the main
+// thread so it can function without doing any allocation
+// itself (as accessing a thread local for the first time
+// may lead to allocation itself on some platforms)
+// --------------------------------------------------------
+
+static mi_decl_cache_align mi_subproc_t subproc_main
+#if __cplusplus
+= { };     // empty initializer to prevent running the constructor (with msvc)
+#else
+= { 0 };   // C zero initialize
+#endif
+
+static mi_subproc_t* subprocs = &subproc_main;
+static mi_lock_t     subprocs_lock;
+
+static mi_decl_cache_align mi_tld_t tld_empty = {
+  0,                      // thread_id
+  0,                      // thread_seq
+  0,                      // default numa node
+  &subproc_main,          // subproc
+  NULL,                   // theaps list
+  false,                  // recurse
+  false,                  // is_in_threadpool
+  MI_MEMID_STATIC         // memid
+};
+
+mi_decl_cache_align const mi_theap_t _mi_theap_empty = {
+  &tld_empty,             // tld
+  NULL,                   // heap
+  0,                      // heartbeat
+  0,                      // cookie
+  { {0}, {0}, 0, true },  // random
+  0,                      // page count
+  MI_BIN_FULL, 0,         // page retired min/max
+  0, 0,                   // generic count
+  NULL, NULL,             // tnext, tprev
+  NULL, NULL,             // hnext, hprev
+  0,                      // full page retain
+  false,                  // allow reclaim
+  true,                   // allow abandon
+  #if MI_GUARDED
+  0, 0, 0, 1,             // sample count is 1 so we never write to it (see `internal.h:mi_theap_malloc_use_guarded`)
+  #endif
+  MI_SMALL_PAGES_EMPTY,
+  MI_PAGE_QUEUES_EMPTY,
+  MI_MEMID_STATIC,
+  { MI_STAT_VERSION, MI_STATS_NULL },      // stats
+};
+
+mi_decl_cache_align const mi_theap_t _mi_theap_empty_wrong = {
+  &tld_empty,             // tld
+  NULL,                   // heap
+  0,                      // heartbeat
+  0,                      // cookie
+  { {0}, {0}, 0, true },  // random
+  0,                      // page count
+  MI_BIN_FULL, 0,         // page retired min/max
+  0, 0,                   // generic count
+  NULL, NULL,             // tnext, tprev
+  NULL, NULL,             // hnext, hprev
+  0,                      // full page retain
+  false,                  // allow reclaim
+  true,                   // allow abandon
+  #if MI_GUARDED
+  0, 0, 0, 1,             // sample count is 1 so we never write to it (see `internal.h:mi_theap_malloc_use_guarded`)
+  #endif
+  MI_SMALL_PAGES_EMPTY,
+  MI_PAGE_QUEUES_EMPTY,
+  MI_MEMID_STATIC,
+  { MI_STAT_VERSION, MI_STATS_NULL },      // stats
+};
+
+// Heap for the main thread
+
+extern mi_decl_hidden mi_decl_cache_align mi_theap_t theap_main;
+extern mi_decl_hidden mi_decl_cache_align mi_heap_t  heap_main;
+
+static mi_decl_cache_align mi_tld_t tld_main = {
+  0,                      // thread_id
+  0,                      // thread_seq
+  0,                      // numa node
+  &subproc_main,          // subproc
+  &theap_main,            // theaps list
+  false,                  // recurse
+  false,                  // is_in_threadpool
+  MI_MEMID_STATIC         // memid
+};
+
+mi_decl_cache_align mi_theap_t theap_main = {
+  &tld_main,              // thread local data
+  &heap_main,             // main heap
+  0,                      // heartbeat
+  0,                      // initial cookie
+  { {0x846ca68b}, {0}, 0, true },  // random
+  0,                      // page count
+  MI_BIN_FULL, 0,         // page retired min/max
+  0, 0,                   // generic count
+  NULL, NULL,             // tnext, tprev
+  NULL, NULL,             // hnext, hprev
+  2,                      // full page retain
+  true,                   // allow page reclaim
+  true,                   // allow page abandon
+  #if MI_GUARDED
+  0, 0, 0, 0,
+  #endif
+  MI_SMALL_PAGES_EMPTY,
+  MI_PAGE_QUEUES_EMPTY,
+  MI_MEMID_STATIC,
+  { MI_STAT_VERSION, MI_STATS_NULL },      // stats
+};
+
+mi_decl_cache_align mi_heap_t heap_main
+#if __cplusplus
+  = { };     // empty initializer to prevent running the constructor (with msvc)
+#else
+  = { 0 };   // C zero initialize
+#endif
+
+mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
+  return _mi_prim_thread_id();
+}
+
+// the theap belonging to the main heap
+mi_decl_hidden mi_decl_thread mi_theap_t* __mi_theap_main = NULL;
+
+#if MI_TLS_MODEL_THREAD_LOCAL
+// the thread-local main theap for allocation
+mi_decl_hidden mi_decl_thread mi_theap_t* __mi_theap_default = (mi_theap_t*)&_mi_theap_empty;
+// the last used non-main theap
+mi_decl_hidden mi_decl_thread mi_theap_t* __mi_theap_cached = (mi_theap_t*)&_mi_theap_empty;
+#endif
+
+bool _mi_process_is_initialized = false;  // set to `true` in `mi_process_init`.
+
+mi_stats_t _mi_stats_main = { MI_STAT_VERSION, MI_STATS_NULL };
+
+#if MI_GUARDED
+mi_decl_export void mi_theap_guarded_set_sample_rate(mi_theap_t* theap, size_t sample_rate, size_t seed) {
+  theap->guarded_sample_rate  = sample_rate;
+  theap->guarded_sample_count = sample_rate;  // count down samples
+  if (theap->guarded_sample_rate > 1) {
+    if (seed == 0) {
+      seed = _mi_theap_random_next(theap);
+    }
+    theap->guarded_sample_count = (seed % theap->guarded_sample_rate) + 1;  // start at random count between 1 and `sample_rate`
+  }
+}
+
+mi_decl_export void mi_theap_guarded_set_size_bound(mi_theap_t* theap, size_t min, size_t max) {
+  theap->guarded_size_min = min;
+  theap->guarded_size_max = (min > max ? min : max);
+}
+
+void _mi_theap_guarded_init(mi_theap_t* theap) {
+  mi_theap_guarded_set_sample_rate(theap,
+    (size_t)mi_option_get_clamp(mi_option_guarded_sample_rate, 0, LONG_MAX),
+    (size_t)mi_option_get(mi_option_guarded_sample_seed));
+  mi_theap_guarded_set_size_bound(theap,
+    (size_t)mi_option_get_clamp(mi_option_guarded_min, 0, LONG_MAX),
+    (size_t)mi_option_get_clamp(mi_option_guarded_max, 0, LONG_MAX) );
+}
+#else
+mi_decl_export void mi_theap_guarded_set_sample_rate(mi_theap_t* theap, size_t sample_rate, size_t seed) {
+  MI_UNUSED(theap); MI_UNUSED(sample_rate); MI_UNUSED(seed);
+}
+
+mi_decl_export void mi_theap_guarded_set_size_bound(mi_theap_t* theap, size_t min, size_t max) {
+  MI_UNUSED(theap); MI_UNUSED(min); MI_UNUSED(max);
+}
+void _mi_theap_guarded_init(mi_theap_t* theap) {
+  MI_UNUSED(theap);
+}
+#endif
+
+/* -----------------------------------------------------------
+  Initialization
+  Note: on some platforms lock_init or just a thread local access
+  can cause allocation and induce recursion during initialization.
+----------------------------------------------------------- */
+
+
+// Initialize main subproc
+static void mi_subproc_main_init(void) {
+  if (subproc_main.memid.memkind != MI_MEM_STATIC) {
+    subproc_main.memid = _mi_memid_create(MI_MEM_STATIC);
+    subproc_main.heaps = &heap_main;
+    subproc_main.heap_total_count = 1;
+    subproc_main.heap_count = 1;
+    mi_atomic_store_release(&subproc_main.heap_main, &heap_main);
+    __mi_stat_increase_mt(&subproc_main.stats.heaps, 1);
+    __mi_stat_increase_mt(&subproc_main.stats.threads, 1);
+    mi_lock_init(&subproc_main.arena_reserve_lock);
+    mi_lock_init(&subproc_main.heaps_lock);
+    mi_lock_init(&subprocs_lock);
+  }
+}
+
+// Initialize main tld
+static void mi_tld_main_init(void) {
+  if (tld_main.thread_id == 0) {
+    tld_main.thread_id = _mi_prim_thread_id();
+  }
+}
+
+void _mi_theap_options_init(mi_theap_t* theap) {
+  theap->allow_page_reclaim = (mi_option_get(mi_option_page_reclaim_on_free) >= 0);
+  theap->allow_page_abandon = (mi_option_get(mi_option_page_full_retain) >= 0);
+  theap->page_full_retain = mi_option_get_clamp(mi_option_page_full_retain, -1, 32);
+}
+
+// Initialization of the (statically allocated) main theap, and the main tld and subproc.
+static void mi_theap_main_init(void) {
+  if mi_unlikely(theap_main.memid.memkind != MI_MEM_STATIC) {
+    // theap
+    theap_main.memid = _mi_memid_create(MI_MEM_STATIC);
+    #if defined(__APPLE__) || defined(_WIN32) && !defined(MI_SHARED_LIB)
+      _mi_random_init_weak(&theap_main.random);    // prevent allocation failure during bcrypt dll initialization with static linking (issue #1185)
+    #else
+      _mi_random_init(&theap_main.random);
+    #endif
+    theap_main.cookie  = _mi_theap_random_next(&theap_main);
+    _mi_theap_options_init(&theap_main);
+    _mi_theap_guarded_init(&theap_main);
+  }
+}
+
+// Initialize main heap
+static void mi_heap_main_init(void) {
+  if mi_unlikely(heap_main.subproc == NULL) {
+    heap_main.subproc = &subproc_main;
+    heap_main.theaps = &theap_main;
+
+    mi_theap_main_init();
+    mi_subproc_main_init();
+    mi_tld_main_init();
+
+    mi_lock_init(&heap_main.theaps_lock);
+    mi_lock_init(&heap_main.os_abandoned_pages_lock);
+    mi_lock_init(&heap_main.arena_pages_lock);
+  }
+}
+
+
+/* -----------------------------------------------------------
+  Thread local data
+----------------------------------------------------------- */
+
+// Allocate fresh tld
+static mi_tld_t* mi_tld_alloc(void) {
+  if (_mi_is_main_thread()) {
+    mi_atomic_increment_relaxed(&tld_main.subproc->thread_count);
+    return &tld_main;
+  }
+  else {
+    // allocate tld meta-data
+    // note: we need to be careful to not access the tld from `_mi_meta_zalloc`
+    // (and in turn from `_mi_arena_alloc_aligned` and `_mi_os_alloc_aligned`).
+    mi_memid_t memid;
+    mi_tld_t* tld = (mi_tld_t*)_mi_meta_zalloc(sizeof(mi_tld_t), &memid);
+    if (tld==NULL) {
+      _mi_error_message(ENOMEM, "unable to allocate memory for thread local data\n");
+      return NULL;
+    }
+    tld->memid = memid;
+    tld->theaps = NULL;
+    tld->subproc = &subproc_main;
+    tld->numa_node = _mi_os_numa_node();
+    tld->thread_id = _mi_prim_thread_id();
+    tld->thread_seq = mi_atomic_increment_relaxed(&tld->subproc->thread_total_count);
+    tld->is_in_threadpool = _mi_prim_thread_is_in_threadpool();
+    mi_atomic_increment_relaxed(&tld->subproc->thread_count);
+    return tld;
+  }
+}
+
+#define MI_TLD_INVALID  ((mi_tld_t*)1)
+
+mi_decl_noinline static void mi_tld_free(mi_tld_t* tld) {
+  if (tld != NULL && tld != MI_TLD_INVALID) {
+    mi_atomic_decrement_relaxed(&tld->subproc->thread_count);
+    _mi_meta_free(tld, sizeof(mi_tld_t), tld->memid);
+  }
+  #if 0
+  // do not read/write to `thread_tld` on older macOS <= 14 as that will re-initialize the thread local storage
+  // (since we are calling this during pthread shutdown)
+  // (and this could happen on other systems as well, so let's never do it)
+  thread_tld = MI_TLD_INVALID;
+  #endif
+}
+
+// return the thread local heap ensuring it is initialized (and not `NULL` or `&_mi_theap_empty`);
+mi_theap_t* _mi_theap_default_safe(void) {
+  mi_theap_t* theap = _mi_theap_default();
+  if mi_likely(mi_theap_is_initialized(theap)) return theap;
+  mi_thread_init();
+  mi_assert_internal(mi_theap_is_initialized(_mi_theap_default()));
+  return _mi_theap_default();
+}
+
+
+mi_subproc_t* _mi_subproc_main(void) {
+  return &subproc_main;
+}
+
+mi_subproc_t* _mi_subproc(void) {
+  // should work without doing initialization (as it may be called from `_mi_tld -> mi_tld_alloc ... -> os_alloc -> _mi_subproc()`
+  // todo: this will still fail on OS systems where the first access to a thread-local causes allocation.
+  //       on such systems we can check for this with the _mi_prim_get_default_theap as those are protected (by being
+  //       stored in a TLS slot for example)
+  mi_theap_t* theap = _mi_theap_default();
+  if (theap == NULL) {
+    return _mi_subproc_main();
+  }
+  else {
+    return theap->tld->subproc;  // avoid using thread local storage (`thread_tld`)
+  }
+}
+
+mi_heap_t* _mi_subproc_heap_main(mi_subproc_t* subproc) {
+  mi_heap_t* heap = mi_atomic_load_relaxed(&subproc->heap_main);
+  if mi_likely(heap!=NULL) {
+    return heap;
+  }
+  else {
+    mi_heap_main_init();
+    mi_assert_internal(mi_atomic_load_relaxed(&subproc->heap_main) != NULL);
+    return mi_atomic_load_relaxed(&subproc->heap_main);
+  }
+}
+
+mi_heap_t* mi_heap_main(void) {
+  return _mi_subproc_heap_main(_mi_subproc()); // don't use _mi_theap_main() so this call works during process_init
+}
+
+bool _mi_is_heap_main(const mi_heap_t* heap) {
+  mi_assert_internal(heap!=NULL);
+  return (_mi_subproc_heap_main(heap->subproc) == heap);
+}
+
+/* -----------------------------------------------------------
+  Sub process
+----------------------------------------------------------- */
+
+mi_subproc_id_t mi_subproc_main(void) {
+  return _mi_subproc_main();
+}
+
+mi_subproc_id_t mi_subproc_current(void) {
+  return _mi_subproc();
+}
+
+mi_subproc_id_t mi_subproc_new(void) {
+  static _Atomic(size_t) subproc_total_count;
+  mi_memid_t memid;
+  mi_subproc_t* subproc = (mi_subproc_t*)_mi_meta_zalloc(sizeof(mi_subproc_t),&memid);
+  if (subproc == NULL) return NULL;
+  subproc->memid = memid;
+  subproc->subproc_seq = mi_atomic_increment_relaxed(&subproc_total_count) + 1;
+  mi_lock_init(&subproc->arena_reserve_lock);
+  mi_lock_init(&subproc->heaps_lock);
+  mi_lock(&subprocs_lock) {
+    // push on subproc list
+    subproc->next = subprocs;
+    if (subprocs!=NULL) { subprocs->prev = subproc; }
+    subprocs = subproc;
+  }
+  return subproc;
+}
+
+mi_subproc_t* _mi_subproc_from_id(mi_subproc_id_t subproc_id) {
+  return (subproc_id == NULL ? &subproc_main : (mi_subproc_t*)subproc_id);
+}
+
+// destroy all subproc resources including arena's, heap's etc.
+static void mi_subproc_unsafe_destroy(mi_subproc_t* subproc)
+{
+  // remove from the subproc list
+  mi_lock(&subprocs_lock) {
+    if (subproc->next!=NULL) { subproc->next->prev = subproc->prev;  }
+    if (subproc->prev!=NULL) { subproc->prev->next = subproc->next;  }
+                        else { mi_assert_internal(subprocs==subproc);  subprocs = subproc->next; }
+  }
+
+  // destroy all subproc heaps
+  mi_lock(&subproc->heaps_lock) {
+    mi_heap_t* heap = subproc->heaps;
+    while (heap != NULL) {
+      mi_heap_t* next = heap->next;
+      if (heap!=subproc->heap_main) {mi_heap_destroy(heap); }
+      heap = next;
+    }
+    mi_assert_internal(subproc->heaps == subproc->heap_main);
+    mi_heap_destroy(subproc->heap_main);
+  }
+
+  // merge stats back into the main subproc?
+  if (subproc!=&subproc_main) {
+    _mi_arenas_unsafe_destroy_all(subproc);
+    _mi_stats_merge_into(&subproc_main.stats, &subproc->stats);
+
+    // safe to release
+    // todo: should we refcount subprocesses?
+    mi_lock_done(&subproc->arena_reserve_lock);
+    mi_lock_done(&subproc->heaps_lock);
+    _mi_meta_free(subproc, sizeof(mi_subproc_t), subproc->memid);
+  }
+}
+
+void mi_subproc_destroy(mi_subproc_id_t subproc_id) {
+  if (subproc_id == NULL) return;
+  mi_subproc_unsafe_destroy(_mi_subproc_from_id(subproc_id));
+}
+
+static void mi_subprocs_unsafe_destroy_all(void) {
+  mi_lock(&subprocs_lock) {
+    mi_subproc_t* subproc = subprocs;
+    while (subproc!=NULL) {
+      mi_subproc_t* next = subproc->next;
+      if (subproc!=&subproc_main) {
+        mi_subproc_unsafe_destroy(subproc);
+      }
+      subproc = next;
+    }
+  }
+  mi_subproc_unsafe_destroy(&subproc_main);
+}
+
+
+void mi_subproc_add_current_thread(mi_subproc_id_t subproc_id) {
+  mi_subproc_t* subproc = _mi_subproc_from_id(subproc_id);
+  mi_tld_t* const tld = _mi_theap_default_safe()->tld;
+  mi_assert(tld->subproc== &subproc_main);
+  if (tld->subproc != &subproc_main) {
+    _mi_warning_message("unable to add thread to the subprocess as it was already in another subprocess (id: %p)\n", subproc);
+    return;
+  }
+  tld->subproc = subproc;
+  tld->thread_seq = mi_atomic_increment_relaxed(&subproc->thread_total_count);
+  mi_atomic_decrement_relaxed(&subproc_main.thread_count);
+  mi_atomic_increment_relaxed(&subproc->thread_count);
+}
+
+
+bool mi_subproc_visit_heaps(mi_subproc_id_t subproc_id, mi_heap_visit_fun* visitor, void* arg) {
+  mi_subproc_t* subproc = _mi_subproc_from_id(subproc_id);
+  if (subproc==NULL) return false;
+  bool ok = true;
+  mi_lock(&subproc->heaps_lock) {
+    for (mi_heap_t* heap = subproc->heaps; heap!=NULL && ok; heap = heap->next) {
+      ok = (*visitor)(heap, arg);
+    }
+  }
+  return ok;
+}
+
+
+/* -----------------------------------------------------------
+  Allocate theap data
+----------------------------------------------------------- */
+
+// Initialize the thread local default theap, called from `mi_thread_init`
+static mi_theap_t* _mi_thread_init_theap_default(void) {
+  mi_theap_t* theap = _mi_theap_default();
+  if (mi_theap_is_initialized(theap)) return theap;
+  if (_mi_is_main_thread()) {
+    mi_heap_main_init();
+    theap = &theap_main;
+  }
+  else {
+    // allocates tld data
+    // note: we cannot access thread-locals yet as that can cause (recursive) allocation
+    // (on macOS <= 14 for example where the loader allocates thread-local data on demand).
+    mi_tld_t* tld = mi_tld_alloc();
+    // allocate and initialize the theap for the main heap
+    theap = _mi_theap_create(mi_heap_main(), tld);
+  }
+  // associate the theap with this thread
+  // (this is safe, on macOS for example, the theap is set in a dedicated TLS slot and thus does not cause recursive allocation)
+  _mi_theap_default_set(theap);
+  mi_assert_internal(_mi_theap_main()==theap);
+  return theap;
+}
+
+
+// Free the thread local theaps
+static void mi_thread_theaps_done(mi_tld_t* tld)
+{
+  // reset the thread local theaps
+  __mi_theap_main = NULL;
+  _mi_theap_default_set((mi_theap_t*)&_mi_theap_empty);
+  _mi_theap_cached_set((mi_theap_t*)&_mi_theap_empty);
+
+  // delete all theaps in this thread
+  mi_theap_t* curr = tld->theaps;
+  while (curr != NULL) {
+    mi_theap_t* next = curr->tnext; // save `tnext` as `curr` will be freed
+    // never destroy theaps; if a dll is linked statically with mimalloc,
+    // there may still be delete/free calls after the mi_fls_done is called. Issue #207
+    _mi_theap_delete(curr);
+    curr = next;
+  }
+  mi_assert(_mi_theap_default()==(mi_theap_t*)&_mi_theap_empty); // careful to not re-initialize the default theap during theap_delete
+  mi_assert(!mi_theap_is_initialized(_mi_theap_default()));
+}
+
+
+
+// --------------------------------------------------------
+// Try to run `mi_thread_done()` automatically so any memory
+// owned by the thread but not yet released can be abandoned
+// and re-owned by another thread.
+//
+// 1. windows dynamic library:
+//     call from DllMain on DLL_THREAD_DETACH
+// 2. windows static library:
+//     use special linker section to call a destructor when the thread is done
+// 3. unix, pthreads:
+//     use a pthread key to call a destructor when a pthread is done
+//
+// In the last two cases we also need to call `mi_process_init`
+// to set up the thread local keys.
+// --------------------------------------------------------
+
+// Set up handlers so `mi_thread_done` is called automatically
+static void mi_process_setup_auto_thread_done(void) {
+  static bool tls_initialized = false; // fine if it races
+  if (tls_initialized) return;
+  tls_initialized = true;
+  _mi_prim_thread_init_auto_done();
+  _mi_theap_default_set(&theap_main);
+}
+
+
+bool _mi_is_main_thread(void) {
+  return (tld_main.thread_id==0 || tld_main.thread_id == _mi_thread_id());
+}
+
+
+// Initialize thread
+void mi_thread_init(void) mi_attr_noexcept
+{
+  // ensure our process has started already
+  mi_process_init();
+  if (mi_theap_is_initialized(_mi_theap_default())) return;
+
+  // initialize the default theap
+  _mi_thread_init_theap_default();
+
+  mi_heap_stat_increase(mi_heap_main(), threads, 1);
+  //_mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id());
+}
+
+void mi_thread_done(void) mi_attr_noexcept {
+  _mi_thread_done(NULL);
+}
+
+void _mi_thread_done(mi_theap_t* _theap_main)
+{
+  // NULL can be passed on some platforms
+  if (_theap_main==NULL) {
+    _theap_main = __mi_theap_main;
+  }
+
+  // prevent re-entrancy through theap_done/theap_set_default_direct (issue #699)
+  if (!mi_theap_is_initialized(_theap_main)) {
+    return;
+  }
+
+  // release dynamic thread_local's
+  _mi_thread_locals_thread_done();
+
+  // note: we store the tld as we should avoid reading `thread_tld` at this point (to avoid reinitializing the thread local storage)
+  mi_tld_t* const tld = _theap_main->tld;
+
+  // adjust stats
+  mi_heap_stat_decrease(_mi_subproc_heap_main(tld->subproc), threads, 1);  // todo: or `_theap_main->heap`?
+
+  // check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local theaps...
+  if (tld->thread_id != _mi_prim_thread_id()) return;
+
+  // delete the thread local theaps
+  mi_thread_theaps_done(tld);
+
+  // free thread local data
+  mi_tld_free(tld);
+}
+
+
+mi_decl_cold mi_decl_noinline mi_theap_t* _mi_theap_empty_get(void) {
+  return (mi_theap_t*)&_mi_theap_empty;
+}
+
+#if MI_TLS_MODEL_DYNAMIC_WIN32
+
+// only for win32 for now
+#if MI_SIZE_SIZE==4
+#define MI_TLS_USER_BASE  (0x0E10 / MI_SIZE_SIZE)
+#else
+#define MI_TLS_USER_BASE  (0x1480 / MI_SIZE_SIZE)
+#endif
+#define MI_TLS_USER_LAST_SLOT  (MI_TLS_USER_BASE + 63)
+
+// we initially use the last user slot so NULL is returned
+// when allocating a slot, we check we get a slot before the last one (so it wasn't used yet)
+mi_decl_hidden size_t _mi_theap_default_slot = MI_TLS_USER_LAST_SLOT;
+mi_decl_hidden size_t _mi_theap_cached_slot  = MI_TLS_USER_LAST_SLOT;
+
+mi_decl_cold mi_theap_t* _mi_tls_slots_init(void) {
+  static mi_atomic_once_t tls_slots_init;
+  if (mi_atomic_once(&tls_slots_init)) {
+    _mi_theap_default_slot = TlsAlloc() + MI_TLS_USER_BASE;
+    _mi_theap_cached_slot  = TlsAlloc() + MI_TLS_USER_BASE;
+    if (_mi_theap_cached_slot >= MI_TLS_USER_LAST_SLOT) {
+      _mi_error_message(EFAULT, "unable to allocate fast TLS user slot (0x%zx)\n", _mi_theap_cached_slot);
+    }
+  }
+  return (mi_theap_t*)&_mi_theap_empty;
+}
+
+#elif MI_TLS_MODEL_DYNAMIC_PTHREADS
+
+// only for pthreads for now
+mi_decl_hidden pthread_key_t _mi_theap_default_key = 0;
+mi_decl_hidden pthread_key_t _mi_theap_cached_key = 0;
+
+mi_decl_cold mi_theap_t* _mi_tls_keys_init(void) {
+  static mi_atomic_once_t tls_keys_init;
+  if (mi_atomic_once(&tls_keys_init)) {
+    pthread_key_create(&_mi_theap_default_key, NULL);
+    pthread_key_create(&_mi_theap_cached_key, NULL);
+  }
+  return (mi_theap_t*)&_mi_theap_empty;
+}
+
+#endif
+
+void _mi_theap_cached_set(mi_theap_t* theap) {
+  #if MI_TLS_MODEL_THREAD_LOCAL
+    __mi_theap_cached = theap;
+  #elif MI_TLS_MODEL_FIXED_SLOT
+    mi_prim_tls_slot_set(MI_TLS_MODEL_FIXED_SLOT_CACHED, theap);
+  #elif MI_TLS_MODEL_DYNAMIC_WIN32
+    _mi_tls_slots_init();
+    mi_prim_tls_slot_set(_mi_theap_cached_slot, theap);
+  #elif MI_TLS_MODEL_DYNAMIC_PTHREADS
+    _mi_tls_keys_init();
+    if (_mi_theap_cached_key!=0) pthread_setspecific(_mi_theap_cached_key, theap);
+  #endif
+}
+
+void _mi_theap_default_set(mi_theap_t* theap)  {
+  mi_assert_internal(theap != NULL);
+  mi_assert_internal(theap->tld->thread_id==0 || theap->tld->thread_id==_mi_thread_id());
+  #if MI_TLS_MODEL_THREAD_LOCAL
+    __mi_theap_default = theap;
+  #elif MI_TLS_MODEL_FIXED_SLOT
+    mi_prim_tls_slot_set(MI_TLS_MODEL_FIXED_SLOT_DEFAULT, theap);
+  #elif MI_TLS_MODEL_DYNAMIC_WIN32
+    _mi_tls_slots_init();
+    mi_prim_tls_slot_set(_mi_theap_default_slot, theap);
+  #elif MI_TLS_MODEL_DYNAMIC_PTHREADS
+    _mi_tls_keys_init();
+    if (_mi_theap_default_key!=0) pthread_setspecific(_mi_theap_default_key, theap);
+  #endif
+
+  // set theap main if needed
+  if (mi_theap_is_initialized(theap)) {
+    // ensure the default theap is passed to `_mi_thread_done` as on some platforms we cannot access TLS at thread termination (as it would allocate again)
+    _mi_prim_thread_associate_default_theap(theap);
+    if (_mi_is_heap_main(theap->heap)) {
+      __mi_theap_main = theap;
+    }
+  }
+}
+
+void mi_thread_set_in_threadpool(void) mi_attr_noexcept {
+  mi_theap_t* theap = _mi_theap_default_safe();
+  theap->tld->is_in_threadpool = true;
+}
+
+// --------------------------------------------------------
+// Run functions on process init/done, and thread init/done
+// --------------------------------------------------------
+static bool os_preloading = true;    // true until this module is initialized
+
+// Returns true if this module has not been initialized; Don't use C runtime routines until it returns false.
+bool mi_decl_noinline _mi_preloading(void) {
+  return os_preloading;
+}
+
+// Returns true if mimalloc was redirected
+mi_decl_nodiscard bool mi_is_redirected(void) mi_attr_noexcept {
+  return _mi_is_redirected();
+}
+
+// Called once by the process loader from `src/prim/prim.c`
+void _mi_auto_process_init(void) {
+  // mi_heap_main_init();
+  // #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD)
+  // volatile mi_theap_t* dummy = __mi_theap_default; // access TLS to allocate it before setting tls_initialized to true;
+  // if (dummy == NULL) return;                       // use dummy or otherwise the access may get optimized away (issue #697)
+  // #endif
+
+  os_preloading = false;
+  mi_assert_internal(_mi_is_main_thread());
+
+  mi_process_init();
+  mi_process_setup_auto_thread_done();
+  _mi_thread_locals_init();
+  _mi_options_post_init();  // now we can print to stderr
+  if (_mi_is_redirected()) _mi_verbose_message("malloc is redirected.\n");
+
+  // show message from the redirector (if present)
+  const char* msg = NULL;
+  _mi_allocator_init(&msg);
+  if (msg != NULL && (mi_option_is_enabled(mi_option_verbose) || mi_option_is_enabled(mi_option_show_errors))) {
+    _mi_fputs(NULL,NULL,NULL,msg);
+  }
+
+  // reseed random
+  _mi_random_reinit_if_weak(&theap_main.random);
+}
+
+// CPU features
+mi_decl_cache_align bool _mi_cpu_has_fsrm = false;
+mi_decl_cache_align bool _mi_cpu_has_erms = false;
+mi_decl_cache_align bool _mi_cpu_has_popcnt = false;
+
+#if (MI_ARCH_X64 || MI_ARCH_X86)
+#if defined(__GNUC__)
+#include <cpuid.h>
+static bool mi_cpuid(uint32_t* regs4, uint32_t level) {
+  return (__get_cpuid(level, &regs4[0], &regs4[1], &regs4[2], &regs4[3]) == 1);
+}
+
+#elif defined(_MSC_VER)
+static bool mi_cpuid(uint32_t* regs4, uint32_t level) {
+  __cpuid((int32_t*)regs4, (int32_t)level);
+  return true;
+}
+#else
+static bool mi_cpuid(uint32_t* regs4, uint32_t level) {
+  MI_UNUSED(regs4); MI_UNUSED(level);
+  return false;
+}
+#endif
+
+static void mi_detect_cpu_features(void) {
+  // FSRM for fast short rep movsb/stosb support (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017))
+  // EMRS for fast enhanced rep movsb/stosb support
+  uint32_t cpu_info[4];
+  if (mi_cpuid(cpu_info, 7)) {
+    _mi_cpu_has_fsrm = ((cpu_info[3] & (1 << 4)) != 0); // bit 4 of EDX : see <https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features>
+    _mi_cpu_has_erms = ((cpu_info[1] & (1 << 9)) != 0); // bit 9 of EBX : see <https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features>
+  }
+  if (mi_cpuid(cpu_info, 1)) {
+    _mi_cpu_has_popcnt = ((cpu_info[2] & (1 << 23)) != 0); // bit 23 of ECX : see <https://en.wikipedia.org/wiki/CPUID#EAX=1:_Processor_Info_and_Feature_Bits>
+  }
+}
+
+#else
+static void mi_detect_cpu_features(void) {
+  #if MI_ARCH_ARM64
+  _mi_cpu_has_popcnt = true;
+  #endif
+}
+#endif
+
+
+// Initialize the process; called by thread_init or the process loader
+void mi_process_init(void) mi_attr_noexcept {
+  // ensure we are called once
+  static mi_atomic_once_t process_init;
+	// #if _MSC_VER < 1920
+	// mi_heap_main_init(); // vs2017 can dynamically re-initialize theap_main
+	// #endif
+  if (!mi_atomic_once(&process_init)) return;
+  _mi_process_is_initialized = true;
+  _mi_verbose_message("process init: 0x%zx\n", _mi_thread_id());
+
+  mi_detect_cpu_features();
+  _mi_options_init();
+  _mi_stats_init();
+  _mi_os_init();
+  // the following can potentially allocate (on freeBSD for pthread keys)
+  // todo: do 2-phase so we can use stats at first, then later init the keys?
+  mi_heap_main_init(); // before page_map_init so stats are working
+  _mi_page_map_init(); // todo: this could fail.. should we abort in that case?
+  mi_thread_init();
+
+  #if defined(_WIN32) && defined(MI_WIN_USE_FLS)
+  // On windows, when building as a static lib the FLS cleanup happens to early for the main thread.
+  // To avoid this, set the FLS value for the main thread to NULL so the fls cleanup
+  // will not call _mi_thread_done on the (still executing) main thread. See issue #508.
+  _mi_prim_thread_associate_default_theap(NULL);
+  #endif
+
+  // mi_stats_reset();  // only call stat reset *after* thread init (or the theap tld == NULL)
+  mi_track_init();
+  if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
+    size_t pages = mi_option_get_clamp(mi_option_reserve_huge_os_pages, 0, 128*1024);
+    int reserve_at  = (int)mi_option_get_clamp(mi_option_reserve_huge_os_pages_at, -1, INT_MAX);
+    if (reserve_at != -1) {
+      mi_reserve_huge_os_pages_at(pages, reserve_at, pages*500);
+    } else {
+      mi_reserve_huge_os_pages_interleave(pages, 0, pages*500);
+    }
+  }
+  if (mi_option_is_enabled(mi_option_reserve_os_memory)) {
+    long ksize = mi_option_get(mi_option_reserve_os_memory);
+    if (ksize > 0) {
+      mi_reserve_os_memory((size_t)ksize*MI_KiB, true, true);
+    }
+  }
+}
+
+// Called when the process is done (cdecl as it is used with `at_exit` on some platforms)
+void mi_cdecl mi_process_done(void) mi_attr_noexcept {
+  // only shutdown if we were initialized
+  if (!_mi_process_is_initialized) return;
+  // ensure we are called once
+  static bool process_done = false;
+  if (process_done) return;
+  process_done = true;
+
+  mi_assert_internal(_mi_theap_default() != NULL);
+  _mi_thread_locals_done();
+
+  // release any thread specific resources and ensure _mi_thread_done is called on all but the main thread
+  _mi_prim_thread_done_auto_done();
+  
+  #ifndef MI_SKIP_COLLECT_ON_EXIT
+    #if (MI_DEBUG || !defined(MI_SHARED_LIB))
+    // free all memory if possible on process exit. This is not needed for a stand-alone process
+    // but should be done if mimalloc is statically linked into another shared library which
+    // is repeatedly loaded/unloaded, see issue #281.
+    mi_theap_collect(_mi_theap_default(), true /* force */);
+    #endif
+  #endif
+
+  // Forcefully release all retained memory; this can be dangerous in general if overriding regular malloc/free
+  // since after process_done there might still be other code running that calls `free` (like at_exit routines,
+  // or C-runtime termination code.
+  if (mi_option_is_enabled(mi_option_destroy_on_exit)) {
+    mi_subprocs_unsafe_destroy_all();
+    _mi_page_map_unsafe_destroy(_mi_subproc_main());
+  }
+  else {
+    mi_heap_stats_merge_to_subproc(mi_heap_main());
+  }
+  //_mi_page_map_unsafe_destroy(_mi_subproc_main());
+
+  if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) {
+    mi_subproc_stats_print_out(NULL, NULL, NULL);
+  }
+  mi_lock_done(&subprocs_lock);
+  _mi_allocator_done();
+  _mi_verbose_message("process done: 0x%zx\n", tld_main.thread_id);
+  os_preloading = true; // don't call the C runtime anymore
+}
+
+void mi_cdecl _mi_auto_process_done(void) mi_attr_noexcept {
+  if (_mi_option_get_fast(mi_option_destroy_on_exit)>1) return;
+  mi_process_done();
+}
diff --git a/3rd/mimalloc-3.2.6/src/libc.c b/3rd/mimalloc-3.2.6/src/libc.c
new file mode 100644
index 00000000..4c891c1e
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/libc.c
@@ -0,0 +1,432 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2018-2024, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+// --------------------------------------------------------
+// This module defines various std libc functions to reduce
+// the dependency on libc, and also prevent errors caused
+// by some libc implementations when called before `main`
+// executes (due to malloc redirection)
+// --------------------------------------------------------
+
+#include "mimalloc.h"
+#include "mimalloc/internal.h"
+#include "mimalloc/prim.h"      // mi_prim_getenv
+
+char _mi_toupper(char c) {
+  if (c >= 'a' && c <= 'z') return (c - 'a' + 'A');
+                       else return c;
+}
+
+int _mi_strnicmp(const char* s, const char* t, size_t n) {
+  if (n == 0) return 0;
+  for (; *s != 0 && *t != 0 && n > 0; s++, t++, n--) {
+    if (_mi_toupper(*s) != _mi_toupper(*t)) break;
+  }
+  return (n == 0 ? 0 : *s - *t);
+}
+
+void _mi_strlcpy(char* dest, const char* src, size_t dest_size) {
+  if (dest==NULL || src==NULL || dest_size == 0) return;
+  // copy until end of src, or when dest is (almost) full
+  while (*src != 0 && dest_size > 1) {
+    *dest++ = *src++;
+    dest_size--;
+  }
+  // always zero terminate
+  *dest = 0;
+}
+
+void _mi_strlcat(char* dest, const char* src, size_t dest_size) {
+  if (dest==NULL || src==NULL || dest_size == 0) return;
+  // find end of string in the dest buffer
+  while (*dest != 0 && dest_size > 1) {
+    dest++;
+    dest_size--;
+  }
+  // and catenate
+  _mi_strlcpy(dest, src, dest_size);
+}
+
+size_t _mi_strlen(const char* s) {
+  if (s==NULL) return 0;
+  size_t len = 0;
+  while(s[len] != 0) { len++; }
+  return len;
+}
+
+size_t _mi_strnlen(const char* s, size_t max_len) {
+  if (s==NULL) return 0;
+  size_t len = 0;
+  while(s[len] != 0 && len < max_len) { len++; }
+  return len;
+}
+
+char* _mi_strnstr(char* s, size_t max_len, const char* pat) {
+  if (s==NULL) return NULL;
+  if (pat==NULL) return s;
+  const size_t m = _mi_strnlen(s, max_len);
+  const size_t n = _mi_strlen(pat);  
+  for (size_t start = 0; start + n <= m; start++) {
+    size_t i = 0;
+    while (i<n && pat[i]==s[start+i]) {
+      i++;
+    }
+    if (i==n) return &s[start];
+  }
+  return NULL;
+}
+
+#ifdef MI_NO_GETENV
+bool _mi_getenv(const char* name, char* result, size_t result_size) {
+  MI_UNUSED(name);
+  MI_UNUSED(result);
+  MI_UNUSED(result_size);
+  return false;
+}
+#else
+bool _mi_getenv(const char* name, char* result, size_t result_size) {
+  if (name==NULL || result == NULL || result_size < 64) return false;
+  return _mi_prim_getenv(name,result,result_size);
+}
+#endif
+
+// --------------------------------------------------------
+// Define our own limited `_mi_vsnprintf` and `_mi_snprintf`
+// This is mostly to avoid calling these when libc is not yet
+// initialized (and to reduce dependencies)
+//
+// format:      d i, p x u, s
+// prec:        z l ll L
+// width:       10
+// align-left:  -
+// fill:        0
+// plus:        +
+// --------------------------------------------------------
+
+static void mi_outc(char c, char** out, char* end) {
+  char* p = *out;
+  if (p >= end) return;
+  *p = c;
+  *out = p + 1;
+}
+
+static void mi_outs(const char* s, char** out, char* end) {
+  if (s == NULL) return;
+  char* p = *out;
+  while (*s != 0 && p < end) {
+    *p++ = *s++;
+  }
+  *out = p;
+}
+
+static void mi_out_fill(char fill, size_t len, char** out, char* end) {
+  char* p = *out;
+  for (size_t i = 0; i < len && p < end; i++) {
+    *p++ = fill;
+  }
+  *out = p;
+}
+
+static void mi_out_alignright(char fill, char* start, size_t len, size_t extra, char* end) {
+  if (len == 0 || extra == 0) return;
+  if (start + len + extra >= end) return;
+  // move `len` characters to the right (in reverse since it can overlap)
+  for (size_t i = 1; i <= len; i++) {
+    start[len + extra - i] = start[len - i];
+  }
+  // and fill the start
+  for (size_t i = 0; i < extra; i++) {
+    start[i] = fill;
+  }
+}
+
+
+static void mi_out_num(uintmax_t x, size_t base, char prefix, char** out, char* end)
+{
+  if (x == 0 || base == 0 || base > 16) {
+    if (prefix != 0) { mi_outc(prefix, out, end); }
+    mi_outc('0',out,end);
+  }
+  else {
+    // output digits in reverse
+    char* start = *out;
+    while (x > 0) {
+      char digit = (char)(x % base);
+      mi_outc((digit <= 9 ? '0' + digit : 'A' + digit - 10),out,end);
+      x = x / base;
+    }
+    if (prefix != 0) {
+      mi_outc(prefix, out, end);
+    }
+    size_t len = *out - start;
+    // and reverse in-place
+    for (size_t i = 0; i < (len / 2); i++) {
+      char c = start[len - i - 1];
+      start[len - i - 1] = start[i];
+      start[i] = c;
+    }
+  }
+}
+
+
+#define MI_NEXTC()  c = *in; if (c==0) break; in++;
+
+int _mi_vsnprintf(char* buf, size_t bufsize, const char* fmt, va_list args) {
+  if (buf == NULL || bufsize == 0 || fmt == NULL) return 0;
+  buf[bufsize - 1] = 0;
+  char* const end = buf + (bufsize - 1);
+  const char* in = fmt;
+  char* out = buf;
+  while (true) {
+    if (out >= end) break;
+    char c;
+    MI_NEXTC();
+    if (c != '%') {
+      if (c == '\\') {
+        MI_NEXTC();
+        switch (c) {
+        case 'e': mi_outc('\x1B', &out, end); break;
+        case 't': mi_outc('\t', &out, end); break;
+        case 'n': mi_outc('\n', &out, end); break;
+        case 'r': mi_outc('\r', &out, end); break;
+        case '\\': mi_outc('\\', &out, end); break;
+        default: /* ignore */ break;
+        }
+      }
+      else if ((c >= ' ' && c <= '~') || c=='\n' || c=='\r' || c=='\t' || c=='\x1b') { // output visible ascii or standard control only
+        mi_outc(c, &out, end);
+      }
+    }
+    else {
+      MI_NEXTC();
+      char   fill = ' ';
+      size_t width = 0;
+      char   numtype = 'd';
+      char   numplus = 0;
+      bool   alignright = true;
+      if (c == '+' || c == ' ') { numplus = c; MI_NEXTC(); }
+      if (c == '-') { alignright = false; MI_NEXTC(); }
+      if (c == '0') { fill = '0'; MI_NEXTC(); }
+      if (c >= '1' && c <= '9') {
+        width = (c - '0'); MI_NEXTC();
+        while (c >= '0' && c <= '9') {
+          width = (10 * width) + (c - '0'); MI_NEXTC();
+        }
+        if (c == 0) break;  // extra check due to while
+      }
+      if (c == 'z' || c == 't' || c == 'L') { numtype = c; MI_NEXTC(); }
+      else if (c == 'l') {
+        numtype = c; MI_NEXTC();
+        if (c == 'l') { numtype = 'L'; MI_NEXTC(); }
+      }
+
+      char* start = out;
+      if (c == '%') {
+        mi_outc('%', &out, end);
+      }
+      else if (c == 's') {
+        // string
+        const char* s = va_arg(args, const char*);
+        mi_outs(s, &out, end);
+      }
+      else if (c == 'p' || c == 'x' || c == 'u') {
+        // unsigned
+        uintmax_t x = 0;
+        if (c == 'x' || c == 'u') {
+          if (numtype == 'z')       x = va_arg(args, size_t);
+          else if (numtype == 't')  x = va_arg(args, uintptr_t); // unsigned ptrdiff_t
+          else if (numtype == 'L')  x = va_arg(args, unsigned long long);
+          else if (numtype == 'l')  x = va_arg(args, unsigned long);
+                               else x = va_arg(args, unsigned int);
+        }
+        else if (c == 'p') {
+          x = va_arg(args, uintptr_t);
+          mi_outs("0x", &out, end);
+          start = out;
+          width = (width >= 2 ? width - 2 : 0);
+        }
+        if (width == 0 && (c == 'x' || c == 'p')) {
+          if (c == 'p')   { width = 2 * (x <= UINT32_MAX ? 4 : ((x >> 16) <= UINT32_MAX ? 6 : sizeof(void*))); }
+          if (width == 0) { width = 2; }
+          fill = '0';
+        }
+        mi_out_num(x, (c == 'x' || c == 'p' ? 16 : 10), numplus, &out, end);
+      }
+      else if (c == 'i' || c == 'd') {
+        // signed
+        intmax_t x = 0;
+        if (numtype == 'z')       x = va_arg(args, intptr_t );
+        else if (numtype == 't')  x = va_arg(args, ptrdiff_t);
+        else if (numtype == 'L')  x = va_arg(args, long long);
+        else if (numtype == 'l')  x = va_arg(args, long);
+                             else x = va_arg(args, int);
+        char pre = 0;
+        if (x < 0) {
+          pre = '-';
+          if (x > INTMAX_MIN) { x = -x; }
+        }
+        else if (numplus != 0) {
+          pre = numplus;
+        }
+        mi_out_num((uintmax_t)x, 10, pre, &out, end);
+      }
+      else if (c >= ' ' && c <= '~') {
+        // unknown format
+        mi_outc('%', &out, end);
+        mi_outc(c, &out, end);
+      }
+
+      // fill & align
+      mi_assert_internal(out <= end);
+      mi_assert_internal(out >= start);
+      const size_t len = out - start;
+      if (len < width) {
+        mi_out_fill(fill, width - len, &out, end);
+        if (alignright && out <= end) {
+          mi_out_alignright(fill, start, len, width - len, end);
+        }
+      }
+    }
+  }
+  mi_assert_internal(out <= end);
+  *out = 0;
+  return (int)(out - buf);
+}
+
+int _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...) {
+  va_list args;
+  va_start(args, fmt);
+  const int written = _mi_vsnprintf(buf, buflen, fmt, args);
+  va_end(args);
+  return written;
+}
+
+
+
+// --------------------------------------------------------
+// generic trailing and leading zero count, and popcount
+// --------------------------------------------------------
+
+#if !MI_HAS_FAST_BITSCAN
+
+static size_t mi_ctz_generic32(uint32_t x) {
+  // de Bruijn multiplication, see <http://keithandkatie.com/keith/papers/debruijn.html>
+  static const uint8_t debruijn[32] = {
+    0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+    31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+  };
+  if (x==0) return 32;
+  return debruijn[(uint32_t)((x & -(int32_t)x) * (uint32_t)(0x077CB531U)) >> 27];
+}
+
+static size_t mi_clz_generic32(uint32_t x) {
+  // de Bruijn multiplication, see <http://keithandkatie.com/keith/papers/debruijn.html>
+  static const uint8_t debruijn[32] = {
+    31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1,
+    23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0
+  };
+  if (x==0) return 32;
+  x |= x >> 1;
+  x |= x >> 2;
+  x |= x >> 4;
+  x |= x >> 8;
+  x |= x >> 16;
+  return debruijn[(uint32_t)(x * (uint32_t)(0x07C4ACDDU)) >> 27];
+}
+
+size_t _mi_ctz_generic(size_t x) {
+  if (x==0) return MI_SIZE_BITS;
+  #if (MI_SIZE_BITS <= 32)
+    return mi_ctz_generic32((uint32_t)x);
+  #else
+    const uint32_t lo = (uint32_t)x;
+    if (lo != 0) {
+      return mi_ctz_generic32(lo);
+    }
+    else {
+      return (32 + mi_ctz_generic32((uint32_t)(x>>32)));
+    }
+  #endif
+}
+
+size_t _mi_clz_generic(size_t x) {
+  if (x==0) return MI_SIZE_BITS;
+  #if (MI_SIZE_BITS <= 32)
+    return mi_clz_generic32((uint32_t)x);
+  #else
+    const uint32_t hi = (uint32_t)(x>>32);
+    if (hi != 0) {
+      return mi_clz_generic32(hi);
+    }
+    else {
+      return 32 + mi_clz_generic32((uint32_t)x);
+    }
+  #endif
+}
+
+#endif // bit scan
+
+
+#if MI_SIZE_SIZE == 4
+#define mi_mask_even_bits32      (0x55555555)
+#define mi_mask_even_pairs32     (0x33333333)
+#define mi_mask_even_nibbles32   (0x0F0F0F0F)
+
+// sum of all the bytes in `x` if it is guaranteed that the sum < 256!
+static size_t mi_byte_sum32(uint32_t x) {
+  // perform `x * 0x01010101`: the highest byte contains the sum of all bytes.
+  x += (x << 8);
+  x += (x << 16);
+  return (size_t)(x >> 24);
+}
+
+static size_t mi_popcount_generic32(uint32_t x) {
+  // first count each 2-bit group `a`, where: a==0b00 -> 00, a==0b01 -> 01, a==0b10 -> 01, a==0b11 -> 10
+  // in other words, `a - (a>>1)`; to do this in parallel, we need to mask to prevent spilling a bit pair
+  // into the lower bit-pair:
+  x = x - ((x >> 1) & mi_mask_even_bits32);
+  // add the 2-bit pair results
+  x = (x & mi_mask_even_pairs32) + ((x >> 2) & mi_mask_even_pairs32);
+  // add the 4-bit nibble results
+  x = (x + (x >> 4)) & mi_mask_even_nibbles32;
+  // each byte now has a count of its bits, we can sum them now:
+  return mi_byte_sum32(x);
+}
+
+mi_decl_noinline size_t _mi_popcount_generic(size_t x) {
+  if (x<=1) return x;
+  if (~x==0) return MI_SIZE_BITS;
+  return mi_popcount_generic32(x);
+}
+
+#else
+#define mi_mask_even_bits64      (0x5555555555555555)
+#define mi_mask_even_pairs64     (0x3333333333333333)
+#define mi_mask_even_nibbles64   (0x0F0F0F0F0F0F0F0F)
+
+// sum of all the bytes in `x` if it is guaranteed that the sum < 256!
+static size_t mi_byte_sum64(uint64_t x) {
+  x += (x << 8);
+  x += (x << 16);
+  x += (x << 32);
+  return (size_t)(x >> 56);
+}
+
+static size_t mi_popcount_generic64(uint64_t x) {
+  x = x - ((x >> 1) & mi_mask_even_bits64);
+  x = (x & mi_mask_even_pairs64) + ((x >> 2) & mi_mask_even_pairs64);
+  x = (x + (x >> 4)) & mi_mask_even_nibbles64;
+  return mi_byte_sum64(x);
+}
+
+mi_decl_noinline size_t _mi_popcount_generic(size_t x) {
+  if (x<=1) return x;
+  if (~x==0) return MI_SIZE_BITS;
+  return mi_popcount_generic64(x);
+}
+#endif
+
diff --git a/3rd/mimalloc-3.2.6/src/options.c b/3rd/mimalloc-3.2.6/src/options.c
new file mode 100644
index 00000000..27465a1c
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/options.c
@@ -0,0 +1,694 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2018-2025, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+#include "mimalloc.h"
+#include "mimalloc/internal.h"
+#include "mimalloc/atomic.h"
+#include "mimalloc/prim.h"  // mi_prim_out_stderr
+
+#include <stdio.h>      // stdin/stdout
+#include <stdlib.h>     // abort
+
+static long mi_max_error_count   = 16; // stop outputting errors after this (use < 0 for no limit)
+static long mi_max_warning_count = 16; // stop outputting warnings after this (use < 0 for no limit)
+
+static void mi_add_stderr_output(void);
+
+int mi_version(void) mi_attr_noexcept {
+  return MI_MALLOC_VERSION;
+}
+
+
+// --------------------------------------------------------
+// Options
+// These can be accessed by multiple threads and may be
+// concurrently initialized, but an initializing data race
+// is ok since they resolve to the same value.
+// --------------------------------------------------------
+
+
+#define MI_OPTION(opt)                  mi_option_##opt, #opt, NULL
+#define MI_OPTION_LEGACY(opt,legacy)    mi_option_##opt, #opt, #legacy
+
+// Some options can be set at build time for statically linked libraries
+// (use `-DMI_EXTRA_CPPDEFS="opt1=val1;opt2=val2"`)
+//
+// This is useful if we cannot pass them as environment variables
+// (and setting them programmatically would be too late)
+
+#ifndef MI_DEFAULT_VERBOSE
+#define MI_DEFAULT_VERBOSE 0
+#endif
+
+#ifndef MI_DEFAULT_ARENA_EAGER_COMMIT
+#define MI_DEFAULT_ARENA_EAGER_COMMIT 2
+#endif
+
+// in KiB
+#ifndef MI_DEFAULT_ARENA_RESERVE
+ #if (MI_INTPTR_SIZE>4)
+  #define MI_DEFAULT_ARENA_RESERVE 1024L*1024L
+ #else
+  #define MI_DEFAULT_ARENA_RESERVE 128L*1024L
+ #endif
+#endif
+
+#ifndef MI_DEFAULT_DISALLOW_ARENA_ALLOC
+#define MI_DEFAULT_DISALLOW_ARENA_ALLOC 0
+#endif
+
+#ifndef MI_DEFAULT_ALLOW_LARGE_OS_PAGES
+#define MI_DEFAULT_ALLOW_LARGE_OS_PAGES 0
+#endif
+
+#ifndef MI_DEFAULT_RESERVE_HUGE_OS_PAGES
+#define MI_DEFAULT_RESERVE_HUGE_OS_PAGES 0
+#endif
+
+#ifndef MI_DEFAULT_RESERVE_OS_MEMORY
+#define MI_DEFAULT_RESERVE_OS_MEMORY 0
+#endif
+
+#ifndef MI_DEFAULT_GUARDED_SAMPLE_RATE
+#if MI_GUARDED
+#define MI_DEFAULT_GUARDED_SAMPLE_RATE 4000
+#else
+#define MI_DEFAULT_GUARDED_SAMPLE_RATE 0
+#endif
+#endif
+
+#ifndef MI_DEFAULT_PAGEMAP_COMMIT
+#if defined(__APPLE__)  // when overloading malloc, we still get mixed pointers sometimes on macOS; this avoids a bad access
+#define MI_DEFAULT_PAGEMAP_COMMIT 1
+#else
+#define MI_DEFAULT_PAGEMAP_COMMIT 0
+#endif
+#endif
+
+#ifndef MI_DEFAULT_PAGE_MAX_RECLAIM
+#define MI_DEFAULT_PAGE_MAX_RECLAIM  (-1)               // unlimited
+#endif
+
+#ifndef MI_DEFAULT_PAGE_CROSS_THREAD_MAX_RECLAIM
+#define MI_DEFAULT_PAGE_CROSS_THREAD_MAX_RECLAIM  32
+#endif
+
+#ifndef MI_DEFAULT_ALLOW_THP
+#if defined(__ANDROID__)
+#define MI_DEFAULT_ALLOW_THP  0
+#else
+#define MI_DEFAULT_ALLOW_THP  1
+#endif
+#endif
+
+// Static options
+static mi_option_desc_t mi_options[_mi_option_last] =
+{
+  // stable options
+#if MI_DEBUG || defined(MI_SHOW_ERRORS)
+  { 1, MI_OPTION_UNINIT, MI_OPTION(show_errors) },
+#else
+  { 0, MI_OPTION_UNINIT, MI_OPTION(show_errors) },
+#endif
+  { 0, MI_OPTION_UNINIT, MI_OPTION(show_stats) },
+  { MI_DEFAULT_VERBOSE, MI_OPTION_UNINIT, MI_OPTION(verbose) },
+
+  // some of the following options are experimental and not all combinations are allowed.
+  { 1, MI_OPTION_UNINIT, MI_OPTION(deprecated_eager_commit) },  
+  { MI_DEFAULT_ARENA_EAGER_COMMIT,
+       MI_OPTION_UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) }, // eager commit arena's? 2 is used to enable this only on an OS that has overcommit (i.e. linux)
+  { 1, MI_OPTION_UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) },        // purge decommits memory (instead of reset) (note: on linux this uses MADV_DONTNEED for decommit)
+  { MI_DEFAULT_ALLOW_LARGE_OS_PAGES,
+       MI_OPTION_UNINIT, MI_OPTION_LEGACY(allow_large_os_pages,large_os_pages) },    // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
+  { MI_DEFAULT_RESERVE_HUGE_OS_PAGES,
+       MI_OPTION_UNINIT, MI_OPTION(reserve_huge_os_pages) },      // per 1GiB huge pages
+  {-1, MI_OPTION_UNINIT, MI_OPTION(reserve_huge_os_pages_at) },   // reserve huge pages at node N
+  { MI_DEFAULT_RESERVE_OS_MEMORY,
+       MI_OPTION_UNINIT, MI_OPTION(reserve_os_memory)     },      // reserve N KiB OS memory in advance (use `option_get_size`)
+  { 0, MI_OPTION_UNINIT, MI_OPTION(deprecated_segment_cache) },   // cache N segments per thread
+  { 0, MI_OPTION_UNINIT, MI_OPTION(deprecated_page_reset) },      // reset page memory on free
+  { 0, MI_OPTION_UNINIT, MI_OPTION(deprecated_abandoned_page_purge) }, 
+  { 0, MI_OPTION_UNINIT, MI_OPTION(deprecated_segment_reset) },   // reset segment memory on free (needs eager commit)
+#if defined(__NetBSD__)
+  { 0, MI_OPTION_UNINIT, MI_OPTION(eager_commit_delay) },         // the first N segments per thread are not eagerly committed
+#else
+  { 1, MI_OPTION_UNINIT, MI_OPTION(deprecated_eager_commit_delay) },  
+#endif
+  { 1000,MI_OPTION_UNINIT, MI_OPTION_LEGACY(purge_delay,reset_delay) },  // purge delay in milli-seconds
+  { 0,   MI_OPTION_UNINIT, MI_OPTION(use_numa_nodes) },           // 0 = use available numa nodes, otherwise use at most N nodes.
+  { 0,   MI_OPTION_UNINIT, MI_OPTION_LEGACY(disallow_os_alloc,limit_os_alloc) },           // 1 = do not use OS memory for allocation (but only reserved arenas)
+  { 100, MI_OPTION_UNINIT, MI_OPTION(os_tag) },                   // only apple specific for now but might serve more or less related purpose
+  { 32,  MI_OPTION_UNINIT, MI_OPTION(max_errors) },               // maximum errors that are output
+  { 32,  MI_OPTION_UNINIT, MI_OPTION(max_warnings) },             // maximum warnings that are output
+  { 10,  MI_OPTION_UNINIT, MI_OPTION(deprecated_max_segment_reclaim)},       // max. percentage of the abandoned segments to be reclaimed per try.
+  { 0,   MI_OPTION_UNINIT, MI_OPTION(destroy_on_exit)},           // release all OS memory on process exit; careful with dangling pointer or after-exit frees!
+  { MI_DEFAULT_ARENA_RESERVE, MI_OPTION_UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time (=1GiB) (use `option_get_size`)
+  { 1,   MI_OPTION_UNINIT, MI_OPTION(arena_purge_mult) },         // purge delay multiplier for arena's
+  { 1,   MI_OPTION_UNINIT, MI_OPTION_LEGACY(deprecated_purge_extend_delay, decommit_extend_delay) },
+  { MI_DEFAULT_DISALLOW_ARENA_ALLOC,   MI_OPTION_UNINIT, MI_OPTION(disallow_arena_alloc) }, // 1 = do not use arena's for allocation (except if using specific arena id's)
+  { 400, MI_OPTION_UNINIT, MI_OPTION(retry_on_oom) },             // windows only: retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries.
+#if defined(MI_VISIT_ABANDONED)
+  { 1,   MI_OPTION_INITIALIZED, MI_OPTION(visit_abandoned) },     // allow visiting theap blocks in abandoned segments; requires taking locks during reclaim.
+#else
+  { 0,   MI_OPTION_UNINIT, MI_OPTION(visit_abandoned) },
+#endif
+  { 0,   MI_OPTION_UNINIT, MI_OPTION(guarded_min) },              // only used when building with MI_GUARDED: minimal rounded object size for guarded objects
+  { MI_GiB, MI_OPTION_UNINIT, MI_OPTION(guarded_max) },           // only used when building with MI_GUARDED: maximal rounded object size for guarded objects
+  { 0,   MI_OPTION_UNINIT, MI_OPTION(guarded_precise) },          // disregard minimal alignment requirement to always place guarded blocks exactly in front of a guard page (=0)
+  { MI_DEFAULT_GUARDED_SAMPLE_RATE,
+         MI_OPTION_UNINIT, MI_OPTION(guarded_sample_rate)},       // 1 out of N allocations in the min/max range will be guarded (=4000)
+  { 0,   MI_OPTION_UNINIT, MI_OPTION(guarded_sample_seed)},
+  { 10000, MI_OPTION_UNINIT, MI_OPTION(generic_collect) },        // collect theaps every N (=10000) generic allocation calls
+  { 0,   MI_OPTION_UNINIT, MI_OPTION_LEGACY(page_reclaim_on_free, abandoned_reclaim_on_free) },// reclaim abandoned (small) pages on a free: -1 = disable completely, 0 = only reclaim into the originating theap, 1 = reclaim on free across theaps
+  { 2,   MI_OPTION_UNINIT, MI_OPTION(page_full_retain) },         // number of (small) pages to retain in the free page queues
+  { 4,   MI_OPTION_UNINIT, MI_OPTION(page_max_candidates) },      // max search to find a best page candidate
+  { 0,   MI_OPTION_UNINIT, MI_OPTION(max_vabits) },               // max virtual address space bits
+  { MI_DEFAULT_PAGEMAP_COMMIT,
+         MI_OPTION_UNINIT, MI_OPTION(pagemap_commit) },           // commit the full pagemap upfront?
+  { 0,   MI_OPTION_UNINIT, MI_OPTION(page_commit_on_demand) },    // commit pages on-demand (2 disables this only on overcommit systems (like Linux))
+  { MI_DEFAULT_PAGE_MAX_RECLAIM,
+         MI_OPTION_UNINIT, MI_OPTION(page_max_reclaim) },         // don't reclaim (small) pages of the same originating theap if we already own N pages in that size class
+  { MI_DEFAULT_PAGE_CROSS_THREAD_MAX_RECLAIM,
+         MI_OPTION_UNINIT, MI_OPTION(page_cross_thread_max_reclaim) }, // don't reclaim (small) pages across threads if we already own N pages in that size class
+  { MI_DEFAULT_ALLOW_THP,
+         MI_OPTION_UNINIT, MI_OPTION(allow_thp) },                // allow transparent huge pages? (=1) (on Android =0 by default). Set to 0 to disable THP for the process.
+  { 0,   MI_OPTION_UNINIT, MI_OPTION(minimal_purge_size) },       // set minimal purge size (in KiB) (=0). By default set to either 64 or 2048 if THP is enabled.
+};
+
+static void mi_option_init(mi_option_desc_t* desc);
+
+static bool mi_option_has_size_in_kib(mi_option_t option) {
+  return (option == mi_option_reserve_os_memory || option == mi_option_arena_reserve || option == mi_option_minimal_purge_size );
+}
+
+void _mi_options_init(void) {
+  // called on process load
+  for(int i = 0; i < _mi_option_last; i++ ) {
+    mi_option_t option = (mi_option_t)i;
+    long l = mi_option_get(option); MI_UNUSED(l); // initialize
+  }
+  mi_max_error_count = mi_option_get(mi_option_max_errors);
+  mi_max_warning_count = mi_option_get(mi_option_max_warnings);
+  #if MI_GUARDED
+  if (mi_option_get(mi_option_guarded_sample_rate) > 0) {
+    if (mi_option_is_enabled(mi_option_allow_large_os_pages)) {
+      mi_option_disable(mi_option_allow_large_os_pages);
+      _mi_warning_message("option 'allow_large_os_pages' is disabled to allow for guarded objects\n");
+    }
+  }
+  #endif  
+}
+
+// called at actual process load, it should be safe to print now
+void _mi_options_post_init(void) {
+  mi_add_stderr_output(); // now it safe to use stderr for output
+  if (mi_option_is_enabled(mi_option_verbose)) { mi_options_print(); }
+}
+
+#define mi_stringifyx(str)  #str                // and stringify
+#define mi_stringify(str)   mi_stringifyx(str)  // expand
+
+mi_decl_export void mi_options_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept
+{
+  // show version
+  const int vermajor = MI_MALLOC_VERSION/1000;
+  const int verminor = (MI_MALLOC_VERSION%1000)/100;
+  const int verpatch = (MI_MALLOC_VERSION%100);
+  _mi_fprintf(out, arg, "v%i.%i.%i%s%s (built on %s, %s)\n", vermajor, verminor, verpatch,
+      #if defined(MI_CMAKE_BUILD_TYPE)
+      ", " mi_stringify(MI_CMAKE_BUILD_TYPE)
+      #else
+      ""
+      #endif
+      ,
+      #if defined(MI_GIT_DESCRIBE)
+      ", git " mi_stringify(MI_GIT_DESCRIBE)
+      #else
+      ""
+      #endif
+      , __DATE__, __TIME__);
+
+  // show options
+  for (int i = 0; i < _mi_option_last; i++) {
+    mi_option_t option = (mi_option_t)i;
+    long l = mi_option_get(option); MI_UNUSED(l); // possibly initialize
+    mi_option_desc_t* desc = &mi_options[option];
+    _mi_fprintf(out, arg, "option '%s': %ld %s\n", desc->name, desc->value, (mi_option_has_size_in_kib(option) ? "KiB" : ""));
+  }
+
+  // show build configuration
+  _mi_fprintf(out, arg, "debug level : %d\n", MI_DEBUG );
+  _mi_fprintf(out, arg, "secure level: %d\n", MI_SECURE );
+  _mi_fprintf(out, arg, "mem tracking: %s\n", MI_TRACK_TOOL);
+  #if MI_GUARDED
+  _mi_fprintf(out, arg, "guarded build: %s\n", mi_option_get(mi_option_guarded_sample_rate) != 0 ? "enabled" : "disabled");
+  #endif
+  #if MI_TSAN
+  _mi_fprintf(out, arg, "thread santizer enabled\n");
+  #endif
+}
+
+mi_decl_export void mi_options_print(void) mi_attr_noexcept {
+  mi_options_print_out(NULL, NULL);
+}
+
+long _mi_option_get_fast(mi_option_t option) {
+  mi_assert(option >= 0 && option < _mi_option_last);
+  mi_option_desc_t* desc = &mi_options[option];
+  mi_assert(desc->option == option);  // index should match the option
+  //mi_assert(desc->init != MI_OPTION_UNINIT);
+  return desc->value;
+}
+
+
+mi_decl_nodiscard long mi_option_get(mi_option_t option) {
+  mi_assert(option >= 0 && option < _mi_option_last);
+  if (option < 0 || option >= _mi_option_last) return 0;
+  mi_option_desc_t* desc = &mi_options[option];
+  mi_assert(desc->option == option);  // index should match the option
+  if mi_unlikely(desc->init == MI_OPTION_UNINIT) {
+    mi_option_init(desc);
+  }
+  return desc->value;
+}
+
+mi_decl_nodiscard long mi_option_get_clamp(mi_option_t option, long min, long max) {
+  long x = mi_option_get(option);
+  return (x < min ? min : (x > max ? max : x));
+}
+
+mi_decl_nodiscard size_t mi_option_get_size(mi_option_t option) {
+  const long x = mi_option_get(option);
+  size_t size = (x < 0 ? 0 : (size_t)x);
+  if (mi_option_has_size_in_kib(option)) {
+    size *= MI_KiB;
+  }
+  return size;
+}
+
+void mi_option_set(mi_option_t option, long value) {
+  mi_assert(option >= 0 && option < _mi_option_last);
+  if (option < 0 || option >= _mi_option_last) return;
+  mi_option_desc_t* desc = &mi_options[option];
+  mi_assert(desc->option == option);  // index should match the option
+  desc->value = value;
+  desc->init = MI_OPTION_INITIALIZED;
+  // ensure min/max range; be careful to not recurse.
+  if (desc->option == mi_option_guarded_min && _mi_option_get_fast(mi_option_guarded_max) < value) {
+    mi_option_set(mi_option_guarded_max, value);
+  }
+  else if (desc->option == mi_option_guarded_max && _mi_option_get_fast(mi_option_guarded_min) > value) {
+    mi_option_set(mi_option_guarded_min, value);
+  }
+}
+
+void mi_option_set_default(mi_option_t option, long value) {
+  mi_assert(option >= 0 && option < _mi_option_last);
+  if (option < 0 || option >= _mi_option_last) return;
+  mi_option_desc_t* desc = &mi_options[option];
+  if (desc->init != MI_OPTION_INITIALIZED) {
+    desc->value = value;
+  }
+}
+
+mi_decl_nodiscard bool mi_option_is_enabled(mi_option_t option) {
+  return (mi_option_get(option) != 0);
+}
+
+void mi_option_set_enabled(mi_option_t option, bool enable) {
+  mi_option_set(option, (enable ? 1 : 0));
+}
+
+void mi_option_set_enabled_default(mi_option_t option, bool enable) {
+  mi_option_set_default(option, (enable ? 1 : 0));
+}
+
+void mi_option_enable(mi_option_t option) {
+  mi_option_set_enabled(option,true);
+}
+
+void mi_option_disable(mi_option_t option) {
+  mi_option_set_enabled(option,false);
+}
+
+static void mi_cdecl mi_out_stderr(const char* msg, void* arg) {
+  MI_UNUSED(arg);
+  if (msg != NULL && msg[0] != 0) {
+    _mi_prim_out_stderr(msg);
+  }
+}
+
+// Since an output function can be registered earliest in the `main`
+// function we also buffer output that happens earlier. When
+// an output function is registered it is called immediately with
+// the output up to that point.
+#ifndef MI_MAX_DELAY_OUTPUT
+#define MI_MAX_DELAY_OUTPUT ((size_t)(16*1024))
+#endif
+static char mi_output_buffer[MI_MAX_DELAY_OUTPUT+1];
+static _Atomic(size_t) out_len;
+
+static void mi_cdecl mi_out_buf(const char* msg, void* arg) {
+  MI_UNUSED(arg);
+  if (msg==NULL) return;
+  if (mi_atomic_load_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return;
+  size_t n = _mi_strlen(msg);
+  if (n==0) return;
+  // claim space
+  size_t start = mi_atomic_add_acq_rel(&out_len, n);
+  if (start >= MI_MAX_DELAY_OUTPUT) return;
+  // check bound
+  if (start+n >= MI_MAX_DELAY_OUTPUT) {
+    n = MI_MAX_DELAY_OUTPUT-start-1;
+  }
+  mi_assert_internal(start + n <= MI_MAX_DELAY_OUTPUT);
+  _mi_memcpy(&mi_output_buffer[start], msg, n);
+}
+
+static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf, void* arg) {
+  if (out==NULL) return;
+  // claim (if `no_more_buf == true`, no more output will be added after this point)
+  size_t count = mi_atomic_add_acq_rel(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1));
+  // and output the current contents
+  if (count>MI_MAX_DELAY_OUTPUT) count = MI_MAX_DELAY_OUTPUT;
+  mi_output_buffer[count] = 0;
+  out(mi_output_buffer,arg);
+  if (!no_more_buf) {
+    mi_output_buffer[count] = '\n'; // if continue with the buffer, insert a newline
+  }
+}
+
+
+// Once this module is loaded, switch to this routine
+// which outputs to stderr and the delayed output buffer.
+static void mi_cdecl mi_out_buf_stderr(const char* msg, void* arg) {
+  mi_out_stderr(msg,arg);
+  mi_out_buf(msg,arg);
+}
+
+
+
+// --------------------------------------------------------
+// Default output handler
+// --------------------------------------------------------
+
+// Should be atomic but gives errors on many platforms as generally we cannot cast a function pointer to a uintptr_t.
+// For now, don't register output from multiple threads.
+static mi_output_fun* volatile mi_out_default; // = NULL
+static _Atomic(void*) mi_out_arg; // = NULL
+
+static mi_output_fun* mi_out_get_default(void** parg) {
+  if (parg != NULL) { *parg = mi_atomic_load_ptr_acquire(void,&mi_out_arg); }
+  mi_output_fun* out = mi_out_default;
+  return (out == NULL ? &mi_out_buf : out);
+}
+
+void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept {
+  mi_out_default = (out == NULL ? &mi_out_stderr : out); // stop using the delayed output buffer
+  mi_atomic_store_ptr_release(void,&mi_out_arg, arg);
+  if (out!=NULL) mi_out_buf_flush(out,true,arg);         // output all the delayed output now
+}
+
+// add stderr to the delayed output after the module is loaded
+static void mi_add_stderr_output(void) {
+  mi_assert_internal(mi_out_default == NULL);
+  if (mi_out_default==NULL) {
+    mi_out_buf_flush(&mi_out_stderr, false, NULL); // flush current contents to stderr
+    mi_out_default = &mi_out_buf_stderr;           // and add stderr to the delayed output
+  }
+}
+
+// --------------------------------------------------------
+// Messages, all end up calling `_mi_fputs`.
+// --------------------------------------------------------
+static _Atomic(size_t) error_count;   // = 0;  // when >= max_error_count stop emitting errors
+static _Atomic(size_t) warning_count; // = 0;  // when >= max_warning_count stop emitting warnings
+
+// When overriding malloc, we may recurse into mi_vfprintf if an allocation
+// inside the C runtime causes another message.
+// In some cases (like on macOS) the loader already allocates which
+// calls into mimalloc; if we then access thread locals (like `recurse`)
+// this may crash as the access may call _tlv_bootstrap that tries to
+// (recursively) invoke malloc again to allocate space for the thread local
+// variables on demand. This is why we use a _mi_preloading test on such
+// platforms. However, C code generator may move the initial thread local address
+// load before the `if` and we therefore split it out in a separate function.
+static mi_decl_thread bool recurse = false;
+
+static mi_decl_noinline bool mi_recurse_enter_prim(void) {
+  if (recurse) return false;
+  recurse = true;
+  return true;
+}
+
+static mi_decl_noinline void mi_recurse_exit_prim(void) {
+  recurse = false;
+}
+
+static bool mi_recurse_enter(void) {
+  #if defined(__APPLE__) || defined(__ANDROID__) || defined(MI_TLS_RECURSE_GUARD)
+  if (_mi_preloading()) return false;
+  #endif
+  return mi_recurse_enter_prim();
+}
+
+static void mi_recurse_exit(void) {
+  #if defined(__APPLE__) || defined(__ANDROID__) || defined(MI_TLS_RECURSE_GUARD)
+  if (_mi_preloading()) return;
+  #endif
+  mi_recurse_exit_prim();
+}
+
+void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message) {
+  if (out==NULL || (void*)out==(void*)stdout || (void*)out==(void*)stderr) { // TODO: use mi_out_stderr for stderr?
+    if (!mi_recurse_enter()) return;
+    out = mi_out_get_default(&arg);
+    if (prefix != NULL) out(prefix, arg);
+    out(message, arg);
+    mi_recurse_exit();
+  }
+  else {
+    if (prefix != NULL) out(prefix, arg);
+    out(message, arg);
+  }
+}
+
+// Define our own limited `fprintf` that avoids memory allocation.
+// We do this using `_mi_vsnprintf` with a limited buffer.
+static void mi_vfprintf( mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args ) {
+  char buf[992];
+  if (fmt==NULL) return;
+  if (!mi_recurse_enter()) return;
+  _mi_vsnprintf(buf, sizeof(buf)-1, fmt, args);
+  mi_recurse_exit();
+  _mi_fputs(out,arg,prefix,buf);
+}
+
+void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) {
+  va_list args;
+  va_start(args,fmt);
+  mi_vfprintf(out,arg,NULL,fmt,args);
+  va_end(args);
+}
+
+static void mi_vfprintf_thread(mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args) {
+  if (prefix != NULL && _mi_strnlen(prefix,33) <= 32 && !_mi_is_main_thread()) {
+    char tprefix[64];
+    _mi_snprintf(tprefix, sizeof(tprefix), "%sthread 0x%tx: ", prefix, (uintptr_t)_mi_thread_id());
+    mi_vfprintf(out, arg, tprefix, fmt, args);
+  }
+  else {
+    mi_vfprintf(out, arg, prefix, fmt, args);
+  }
+}
+
+void _mi_raw_message(const char* fmt, ...) {
+  va_list args;
+  va_start(args, fmt);
+  mi_vfprintf(NULL, NULL, NULL, fmt, args);
+  va_end(args);
+}
+
+void _mi_message(const char* fmt, ...) {
+  va_list args;
+  va_start(args, fmt);
+  mi_vfprintf_thread(NULL, NULL, "mimalloc: ", fmt, args);
+  va_end(args);
+}
+
+void _mi_trace_message(const char* fmt, ...) {
+  if (mi_option_get(mi_option_verbose) <= 1) return;  // only with verbose level 2 or higher
+  va_list args;
+  va_start(args, fmt);
+  mi_vfprintf_thread(NULL, NULL, "mimalloc: ", fmt, args);
+  va_end(args);
+}
+
+void _mi_verbose_message(const char* fmt, ...) {
+  if (!mi_option_is_enabled(mi_option_verbose)) return;
+  va_list args;
+  va_start(args,fmt);
+  mi_vfprintf(NULL, NULL, "mimalloc: ", fmt, args);
+  va_end(args);
+}
+
+static void mi_show_error_message(const char* fmt, va_list args) {
+  if (!mi_option_is_enabled(mi_option_verbose)) {
+    if (!mi_option_is_enabled(mi_option_show_errors)) return;
+    if (mi_max_error_count >= 0 && (long)mi_atomic_increment_acq_rel(&error_count) > mi_max_error_count) return;
+  }
+  mi_vfprintf_thread(NULL, NULL, "mimalloc: error: ", fmt, args);
+}
+
+void _mi_warning_message(const char* fmt, ...) {
+  if (!mi_option_is_enabled(mi_option_verbose)) {
+    if (!mi_option_is_enabled(mi_option_show_errors)) return;
+    if (mi_max_warning_count >= 0 && (long)mi_atomic_increment_acq_rel(&warning_count) > mi_max_warning_count) return;
+  }
+  va_list args;
+  va_start(args,fmt);
+  mi_vfprintf_thread(NULL, NULL, "mimalloc: warning: ", fmt, args);
+  va_end(args);
+}
+
+
+#if MI_DEBUG
+mi_decl_noreturn mi_decl_cold void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, const char* func ) mi_attr_noexcept {
+  _mi_fprintf(NULL, NULL, "mimalloc: assertion failed: at \"%s\":%u, %s\n  assertion: \"%s\"\n", fname, line, (func==NULL?"":func), assertion);
+  abort();
+}
+#endif
+
+// --------------------------------------------------------
+// Errors
+// --------------------------------------------------------
+
+static mi_error_fun* volatile  mi_error_handler; // = NULL
+static _Atomic(void*) mi_error_arg;     // = NULL
+
+static void mi_error_default(int err) {
+  MI_UNUSED(err);
+#if (MI_DEBUG>0)
+  if (err==EFAULT) {
+    #ifdef _MSC_VER
+    __debugbreak();
+    #endif
+    abort();
+  }
+#endif
+#if (MI_SECURE>0)
+  if (err==EFAULT) {  // abort on serious errors in secure mode (corrupted meta-data)
+    abort();
+  }
+#endif
+#if defined(MI_XMALLOC)
+  if (err==ENOMEM || err==EOVERFLOW) { // abort on memory allocation fails in xmalloc mode
+    abort();
+  }
+#endif
+}
+
+void mi_register_error(mi_error_fun* fun, void* arg) {
+  mi_error_handler = fun;  // can be NULL
+  mi_atomic_store_ptr_release(void,&mi_error_arg, arg);
+}
+
+void _mi_error_message(int err, const char* fmt, ...) {
+  // show detailed error message
+  va_list args;
+  va_start(args, fmt);
+  mi_show_error_message(fmt, args);
+  va_end(args);
+  // and call the error handler which may abort (or return normally)
+  if (mi_error_handler != NULL) {
+    mi_error_handler(err, mi_atomic_load_ptr_acquire(void,&mi_error_arg));
+  }
+  else {
+    mi_error_default(err);
+  }
+}
+
+// --------------------------------------------------------
+// Initialize options by checking the environment
+// --------------------------------------------------------
+
+// TODO: implement ourselves to reduce dependencies on the C runtime
+#include <stdlib.h> // strtol
+#include <string.h> // strstr
+
+
+static void mi_option_init(mi_option_desc_t* desc) {
+  // Read option value from the environment
+  char s[64 + 1];
+  char buf[64+1];
+  _mi_strlcpy(buf, "mimalloc_", sizeof(buf));
+  _mi_strlcat(buf, desc->name, sizeof(buf));
+  bool found = _mi_getenv(buf, s, sizeof(s));
+  if (!found && desc->legacy_name != NULL) {
+    _mi_strlcpy(buf, "mimalloc_", sizeof(buf));
+    _mi_strlcat(buf, desc->legacy_name, sizeof(buf));
+    found = _mi_getenv(buf, s, sizeof(s));
+    if (found) {
+      _mi_warning_message("environment option \"mimalloc_%s\" is deprecated -- use \"mimalloc_%s\" instead.\n", desc->legacy_name, desc->name);
+    }
+  }
+
+  if (found) {
+    size_t len = _mi_strnlen(s, sizeof(buf) - 1);
+    for (size_t i = 0; i < len; i++) {
+      buf[i] = _mi_toupper(s[i]);
+    }
+    buf[len] = 0;
+    if (buf[0] == 0 || strstr("1;TRUE;YES;ON", buf) != NULL) {
+      desc->value = 1;
+      desc->init = MI_OPTION_INITIALIZED;
+    }
+    else if (strstr("0;FALSE;NO;OFF", buf) != NULL) {
+      desc->value = 0;
+      desc->init = MI_OPTION_INITIALIZED;
+    }
+    else {
+      char* end = buf;
+      long value = strtol(buf, &end, 10);
+      if (mi_option_has_size_in_kib(desc->option)) {
+        // this option is interpreted in KiB to prevent overflow of `long` for large allocations
+        // (long is 32-bit on 64-bit windows, which allows for 4TiB max.)
+        size_t size = (value < 0 ? 0 : (size_t)value);
+        bool overflow = false;
+        if (*end == 'K') { end++; }
+        else if (*end == 'M') { overflow = mi_mul_overflow(size,MI_KiB,&size); end++; }
+        else if (*end == 'G') { overflow = mi_mul_overflow(size,MI_MiB,&size); end++; }
+        else if (*end == 'T') { overflow = mi_mul_overflow(size,MI_GiB,&size); end++; }
+        else { size = (size + MI_KiB - 1) / MI_KiB; }
+        if (end[0] == 'I' && end[1] == 'B') { end += 2; } // KiB, MiB, GiB, TiB
+        else if (*end == 'B') { end++; }                  // Kb, Mb, Gb, Tb
+        if (overflow || size > MI_MAX_ALLOC_SIZE) { size = (MI_MAX_ALLOC_SIZE / MI_KiB); }
+        value = (size > LONG_MAX ? LONG_MAX : (long)size);
+      }
+      if (*end == 0) {
+        mi_option_set(desc->option, value);
+      }
+      else {
+        // set `init` first to avoid recursion through _mi_warning_message on mimalloc_verbose.
+        desc->init = MI_OPTION_DEFAULTED;
+        if (desc->option == mi_option_verbose && desc->value == 0) {
+          // if the 'mimalloc_verbose' env var has a bogus value we'd never know
+          // (since the value defaults to 'off') so in that case briefly enable verbose
+          desc->value = 1;
+          _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name);
+          desc->value = 0;
+        }
+        else {
+          _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name);
+        }
+      }
+    }
+    mi_assert_internal(desc->init != MI_OPTION_UNINIT);
+  }
+  else if (!_mi_preloading()) {
+    desc->init = MI_OPTION_DEFAULTED;
+  }
+}
diff --git a/3rd/mimalloc-3.2.6/src/os.c b/3rd/mimalloc-3.2.6/src/os.c
new file mode 100644
index 00000000..f2d9f699
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/os.c
@@ -0,0 +1,858 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2018-2025, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+#include "mimalloc.h"
+#include "mimalloc/internal.h"
+#include "mimalloc/atomic.h"
+#include "mimalloc/prim.h"
+
+/* -----------------------------------------------------------
+  Initialization.
+----------------------------------------------------------- */
+#ifndef MI_DEFAULT_PHYSICAL_MEMORY_IN_KIB
+#if MI_INTPTR_SIZE < 8
+#define MI_DEFAULT_PHYSICAL_MEMORY_IN_KIB   4*MI_MiB    // 4 GiB
+#else
+#define MI_DEFAULT_PHYSICAL_MEMORY_IN_KIB   32*MI_MiB   // 32 GiB
+#endif
+#endif
+
+static mi_os_mem_config_t mi_os_mem_config = {
+  4096,     // page size
+  0,        // large page size (usually 2MiB)
+  4096,     // allocation granularity
+  MI_DEFAULT_PHYSICAL_MEMORY_IN_KIB,
+  MI_MAX_VABITS, // in `bits.h`
+  true,     // has overcommit?  (if true we use MAP_NORESERVE on mmap systems)
+  false,    // can we partially free allocated blocks? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span)
+  true,     // has virtual reserve? (if true we can reserve virtual address space without using commit or physical memory)
+  false     // has transparent huge pages? (if true we purge in (aligned) large page size chunks only to not fragment such pages)
+};
+
+bool _mi_os_has_overcommit(void) {
+  return mi_os_mem_config.has_overcommit;
+}
+
+bool _mi_os_has_virtual_reserve(void) {
+  return mi_os_mem_config.has_virtual_reserve;
+}
+
+
+// OS (small) page size
+size_t _mi_os_page_size(void) {
+  return mi_os_mem_config.page_size;
+}
+
+// if large OS pages are supported (2 or 4MiB), then return the size, otherwise return the small page size (4KiB)
+size_t _mi_os_large_page_size(void) {
+  return (mi_os_mem_config.large_page_size != 0 ? mi_os_mem_config.large_page_size : _mi_os_page_size());
+}
+
+// minimal purge size. Can be larger than the page size if transparent huge pages are enabled.
+size_t _mi_os_minimal_purge_size(void) {
+  size_t minsize = mi_option_get_size(mi_option_minimal_purge_size);
+  if (minsize != 0) {
+    return _mi_align_up(minsize, _mi_os_page_size());
+  }
+  else if (mi_os_mem_config.has_transparent_huge_pages && mi_option_is_enabled(mi_option_allow_thp)) {
+    return _mi_os_large_page_size();
+  }
+  else {
+    return _mi_os_page_size();
+  }
+}
+
+size_t _mi_os_guard_page_size(void) {
+  const size_t gsize = _mi_os_page_size();
+  mi_assert(gsize <= (MI_ARENA_SLICE_SIZE/4)); // issue #1166
+  return gsize;
+}
+
+size_t _mi_os_virtual_address_bits(void) {
+  const size_t vbits = mi_os_mem_config.virtual_address_bits;
+  mi_assert(vbits <= MI_MAX_VABITS);
+  return vbits;
+}
+
+bool _mi_os_canuse_large_page(size_t size, size_t alignment) {
+  // if we have access, check the size and alignment requirements
+  if (mi_os_mem_config.large_page_size == 0) return false;
+  return ((size % mi_os_mem_config.large_page_size) == 0 && (alignment % mi_os_mem_config.large_page_size) == 0);
+}
+
+// round to a good OS allocation size (bounded by max 12.5% waste)
+size_t _mi_os_good_alloc_size(size_t size) {
+  size_t align_size;
+  if (size < 512*MI_KiB) align_size = _mi_os_page_size();
+  else if (size < 2*MI_MiB) align_size = 64*MI_KiB;
+  else if (size < 8*MI_MiB) align_size = 256*MI_KiB;
+  else if (size < 32*MI_MiB) align_size = 1*MI_MiB;
+  else align_size = 4*MI_MiB;
+  if mi_unlikely(size >= (SIZE_MAX - align_size)) return size; // possible overflow?
+  return _mi_align_up(size, align_size);
+}
+
+void _mi_os_init(void) {
+  _mi_prim_mem_init(&mi_os_mem_config);
+}
+
+
+/* -----------------------------------------------------------
+  Util
+-------------------------------------------------------------- */
+bool _mi_os_decommit(void* addr, size_t size);
+bool _mi_os_commit(void* addr, size_t size, bool* is_zero);
+
+void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
+  MI_UNUSED(try_alignment); MI_UNUSED(size);
+  return NULL;
+}
+
+
+/* -----------------------------------------------------------
+  Guard page allocation
+----------------------------------------------------------- */
+
+// In secure mode, return the size of a guard page, otherwise 0
+size_t _mi_os_secure_guard_page_size(void) {
+  #if MI_SECURE > 0
+  return _mi_os_guard_page_size();
+  #else
+  return 0;
+  #endif
+}
+
+// In secure mode, try to decommit an area and output a warning if this fails.
+bool _mi_os_secure_guard_page_set_at(void* addr, mi_memid_t memid) {
+  if (addr == NULL) return true;
+  #if MI_SECURE > 0
+  bool ok = false;
+  if (!memid.is_pinned) {
+    mi_arena_t* const arena = mi_memid_arena(memid);
+    if (arena != NULL && arena->commit_fun != NULL) {
+      ok = (*(arena->commit_fun))(false /* decommit */, addr, _mi_os_secure_guard_page_size(), NULL, arena->commit_fun_arg);
+    }
+    else {
+      ok = _mi_os_decommit(addr, _mi_os_secure_guard_page_size());
+    }
+  }
+  if (!ok) {
+    _mi_error_message(EINVAL, "secure level %d, but failed to commit guard page (at %p of size %zu)\n", MI_SECURE, addr, _mi_os_secure_guard_page_size());
+  }
+  return ok;
+  #else
+  MI_UNUSED(memid);
+  return true;
+  #endif
+}
+
+// In secure mode, try to decommit an area and output a warning if this fails.
+bool _mi_os_secure_guard_page_set_before(void* addr, mi_memid_t memid) {
+  return _mi_os_secure_guard_page_set_at((uint8_t*)addr - _mi_os_secure_guard_page_size(), memid);
+}
+
+// In secure mode, try to recommit an area
+bool _mi_os_secure_guard_page_reset_at(void* addr, mi_memid_t memid) {
+  if (addr == NULL) return true;
+  #if MI_SECURE > 0
+  if (!memid.is_pinned) {
+    mi_arena_t* const arena = mi_memid_arena(memid);
+    if (arena != NULL && arena->commit_fun != NULL) {
+      return (*(arena->commit_fun))(true, addr, _mi_os_secure_guard_page_size(), NULL, arena->commit_fun_arg);
+    }
+    else {
+      return _mi_os_commit(addr, _mi_os_secure_guard_page_size(), NULL);
+    }
+  }
+  #else
+  MI_UNUSED(memid);
+  #endif
+  return true;
+}
+
+// In secure mode, try to recommit an area
+bool _mi_os_secure_guard_page_reset_before(void* addr, mi_memid_t memid) {
+  return _mi_os_secure_guard_page_reset_at((uint8_t*)addr - _mi_os_secure_guard_page_size(), memid);
+}
+
+
+/* -----------------------------------------------------------
+  Free memory
+-------------------------------------------------------------- */
+
+static void mi_os_free_huge_os_pages(void* p, size_t size, mi_subproc_t* subproc);
+
+static void mi_os_prim_free(void* addr, size_t size, size_t commit_size, mi_subproc_t* subproc) {
+  mi_assert_internal((size % _mi_os_page_size()) == 0);
+  if (addr == NULL) return; // || _mi_os_is_huge_reserved(addr)
+  int err = _mi_prim_free(addr, size);  // allow size==0 (issue #1041)
+  if (err != 0) {
+    _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr);
+  }
+  if (subproc == NULL) { subproc = _mi_subproc(); } // from `mi_arenas_unsafe_destroy` we pass subproc_main explicitly as we can no longer use the theap pointer
+  if (commit_size > 0) {
+    mi_subproc_stat_decrease(subproc, committed, commit_size);
+  }
+  mi_subproc_stat_decrease(subproc, reserved, size);
+}
+
+void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid, mi_subproc_t* subproc /* can be NULL */) {
+  if (mi_memkind_is_os(memid.memkind)) {
+    size_t csize = memid.mem.os.size;
+    if (csize==0) { csize = _mi_os_good_alloc_size(size); }
+    mi_assert_internal(csize >= size);
+    size_t commit_size = (still_committed ? csize : 0);
+    void* base = addr;
+    // different base? (due to alignment)
+    if (memid.mem.os.base != base) {
+      mi_assert(memid.mem.os.base <= addr);
+      base = memid.mem.os.base;
+      const size_t diff = (uint8_t*)addr - (uint8_t*)memid.mem.os.base;
+      if (memid.mem.os.size==0) {
+        csize += diff;
+      }
+      if (still_committed) {
+        commit_size -= diff;  // the (addr-base) part was already un-committed
+      }
+    }
+    // free it
+    if (memid.memkind == MI_MEM_OS_HUGE) {
+      mi_assert(memid.is_pinned);
+      mi_os_free_huge_os_pages(base, csize, subproc);
+    }
+    else {
+      mi_os_prim_free(base, csize, (still_committed ? commit_size : 0), subproc);
+    }
+  }
+  else {
+    // nothing to do
+    mi_assert(memid.memkind < MI_MEM_OS);
+  }
+}
+
+void  _mi_os_free(void* p, size_t size, mi_memid_t memid) {
+  _mi_os_free_ex(p, size, true, memid, NULL);
+}
+
+
+/* -----------------------------------------------------------
+   Primitive allocation from the OS.
+-------------------------------------------------------------- */
+
+// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
+// Also `hint_addr` is a hint and may be ignored.
+static void* mi_os_prim_alloc_at(void* hint_addr, size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero) {
+  mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
+  mi_assert_internal(is_zero != NULL);
+  mi_assert_internal(is_large != NULL);
+  if (size == 0) return NULL;
+  if (!commit) { allow_large = false; }
+  if (try_alignment == 0) { try_alignment = 1; } // avoid 0 to ensure there will be no divide by zero when aligning
+  *is_zero = false;
+  void* p = NULL;
+  int err = _mi_prim_alloc(hint_addr, size, try_alignment, commit, allow_large, is_large, is_zero, &p);
+  if (err != 0) {
+    _mi_warning_message("unable to allocate OS memory (error: %d (0x%x), addr: %p, size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, hint_addr, size, try_alignment, commit, allow_large);
+  }
+
+  mi_os_stat_counter_increase(mmap_calls, 1);
+  if (p != NULL) {
+    mi_os_stat_increase(reserved, size);
+    if (commit) {
+      mi_os_stat_increase(committed, size);
+      // seems needed for asan (or `mimalloc-test-api` fails)
+      #ifdef MI_TRACK_ASAN
+      if (*is_zero) { mi_track_mem_defined(p,size); }
+               else { mi_track_mem_undefined(p,size); }
+      #endif
+    }
+  }
+  return p;
+}
+
+static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero) {
+  return mi_os_prim_alloc_at(NULL, size, try_alignment, commit, allow_large, is_large, is_zero);
+}
+
+
+// Primitive aligned allocation from the OS.
+// This function guarantees the allocated memory is aligned.
+static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** base) {
+  mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0));
+  mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
+  mi_assert_internal(is_large != NULL);
+  mi_assert_internal(is_zero != NULL);
+  mi_assert_internal(base != NULL);
+  if (!commit) allow_large = false;
+  if (!(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0))) return NULL;
+  size = _mi_align_up(size, _mi_os_page_size());
+
+  // try a direct allocation if the alignment is below the default, or if larger than 1/8 fraction of the size.
+  const bool try_direct_alloc = (alignment <= mi_os_mem_config.alloc_granularity || alignment > size/8);
+
+  void* p = NULL;
+  if (try_direct_alloc) {
+    p = mi_os_prim_alloc(size, alignment, commit, allow_large, is_large, is_zero);
+  }
+
+  // aligned already?
+  if (p != NULL && ((uintptr_t)p % alignment) == 0) {
+    *base = p;
+  }
+  else {
+    // if not aligned, free it, overallocate, and unmap around it
+    #if !MI_TRACK_ASAN
+    if (try_direct_alloc) {
+      _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit);
+    }
+    #endif
+    if (p != NULL) { mi_os_prim_free(p, size, (commit ? size : 0), NULL); }
+    if (size >= (SIZE_MAX - alignment)) return NULL; // overflow
+    const size_t over_size = size + alignment;
+
+    if (!mi_os_mem_config.has_partial_free) {  // win32 virtualAlloc cannot free parts of an allocated block
+      // over-allocate uncommitted (virtual) memory
+      p = mi_os_prim_alloc(over_size, 1 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero);
+      if (p == NULL) return NULL;
+
+      // set p to the aligned part in the full region
+      // note: on Windows VirtualFree needs the actual base pointer
+      // this is handledby having the `base` field in the memid.
+      *base = p; // remember the base
+      p = _mi_align_up_ptr(p, alignment);
+
+      // explicitly commit only the aligned part
+      if (commit) {
+        if (!_mi_os_commit(p, size, NULL)) {
+          mi_os_prim_free(*base, over_size, 0, NULL);
+          return NULL;
+        }
+      }
+    }
+    else  { // mmap can free inside an allocation
+      // overallocate...
+      p = mi_os_prim_alloc(over_size, 1, commit, false, is_large, is_zero);
+      if (p == NULL) return NULL;
+
+      // and selectively unmap parts around the over-allocated area.
+      void* aligned_p = _mi_align_up_ptr(p, alignment);
+      size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p;
+      size_t mid_size = _mi_align_up(size, _mi_os_page_size());
+      size_t post_size = over_size - pre_size - mid_size;
+      mi_assert_internal(pre_size < over_size&& post_size < over_size&& mid_size >= size);
+      if (pre_size > 0)  { mi_os_prim_free(p, pre_size, (commit ? pre_size : 0), NULL); }
+      if (post_size > 0) { mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, (commit ? post_size : 0), NULL); }
+      // we can return the aligned pointer on `mmap` systems
+      p = aligned_p;
+      *base = aligned_p; // since we freed the pre part, `*base == p`.
+    }
+  }
+
+  mi_assert_internal(p == NULL || (p != NULL && *base != NULL && ((uintptr_t)p % alignment) == 0));
+  return p;
+}
+
+
+/* -----------------------------------------------------------
+  OS API: alloc and alloc_aligned
+----------------------------------------------------------- */
+
+void* _mi_os_alloc(size_t size, mi_memid_t* memid) {
+  *memid = _mi_memid_none();
+  if (size == 0) return NULL;
+  size = _mi_os_good_alloc_size(size);
+  bool os_is_large = false;
+  bool os_is_zero  = false;
+  void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero);
+  if (p == NULL) return NULL;
+
+  *memid = _mi_memid_create_os(p, size, true, os_is_zero, os_is_large);
+  mi_assert_internal(memid->mem.os.size >= size);
+  mi_assert_internal(memid->initially_committed);
+  return p;
+}
+
+void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid)
+{
+  MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings
+  *memid = _mi_memid_none();
+  if (size == 0) return NULL;
+  size = _mi_os_good_alloc_size(size);
+  alignment = _mi_align_up(alignment, _mi_os_page_size());
+
+  bool os_is_large = false;
+  bool os_is_zero  = false;
+  void* os_base = NULL;
+  void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base );
+  if (p == NULL) return NULL;
+
+  *memid = _mi_memid_create_os(p, size, commit, os_is_zero, os_is_large);
+  memid->mem.os.base = os_base;
+  memid->mem.os.size += ((uint8_t*)p - (uint8_t*)os_base);  // todo: return from prim_alloc_aligned?
+
+  mi_assert_internal(memid->mem.os.size >= size);
+  mi_assert_internal(_mi_is_aligned(p,alignment));
+  if (commit) { mi_assert_internal(memid->initially_committed); }
+  return p;
+}
+
+
+mi_decl_nodiscard static void* mi_os_ensure_zero(void* p, size_t size, mi_memid_t* memid) {
+  if (p==NULL || size==0) return p;
+  // ensure committed
+  if (!memid->initially_committed) {
+    bool is_zero = false;
+    if (!_mi_os_commit(p, size, &is_zero)) {
+      _mi_os_free(p, size, *memid);
+      return NULL;
+    }
+    memid->initially_committed = true;
+  }
+  // ensure zero'd
+  if (memid->initially_zero) return p;
+  _mi_memzero_aligned(p,size);
+  memid->initially_zero = true;
+  return p;
+}
+
+void*  _mi_os_zalloc(size_t size, mi_memid_t* memid) {
+  void* p = _mi_os_alloc(size,memid);
+  return mi_os_ensure_zero(p, size, memid);
+}
+
+/* -----------------------------------------------------------
+  OS aligned allocation with an offset. This is used
+  for large alignments > MI_BLOCK_ALIGNMENT_MAX. We use a large mimalloc
+  page where the object can be aligned at an offset from the start of the segment.
+  As we may need to overallocate, we need to free such pointers using `mi_free_aligned`
+  to use the actual start of the memory region.
+----------------------------------------------------------- */
+
+void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid) {
+  mi_assert(offset <= size);
+  mi_assert((alignment % _mi_os_page_size()) == 0);
+  *memid = _mi_memid_none();
+  if (offset == 0) {
+    // regular aligned allocation
+    return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid);
+  }
+  else {
+    // overallocate to align at an offset
+    const size_t extra = _mi_align_up(offset, alignment) - offset;
+    const size_t oversize = size + extra;
+    void* const start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, memid);
+    if (start == NULL) return NULL;
+
+    void* const p = (uint8_t*)start + extra;
+    mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment));
+    // decommit the overallocation at the start
+    if (commit && extra > _mi_os_page_size()) {
+      _mi_os_decommit(start, extra);
+    }
+    return p;
+  }
+}
+
+/* -----------------------------------------------------------
+  OS memory API: reset, commit, decommit, protect, unprotect.
+----------------------------------------------------------- */
+
+// OS page align within a given area, either conservative (pages inside the area only),
+// or not (straddling pages outside the area is possible)
+static void* mi_os_page_align_areax(bool conservative, void* addr, size_t size, size_t* newsize) {
+  mi_assert(addr != NULL && size > 0);
+  if (newsize != NULL) *newsize = 0;
+  if (size == 0 || addr == NULL) return NULL;
+
+  // page align conservatively within the range, or liberally straddling pages outside the range
+  void* start = (conservative ? _mi_align_up_ptr(addr, _mi_os_page_size())
+    : mi_align_down_ptr(addr, _mi_os_page_size()));
+  void* end = (conservative ? mi_align_down_ptr((uint8_t*)addr + size, _mi_os_page_size())
+    : _mi_align_up_ptr((uint8_t*)addr + size, _mi_os_page_size()));
+  ptrdiff_t diff = (uint8_t*)end - (uint8_t*)start;
+  if (diff <= 0) return NULL;
+
+  mi_assert_internal((conservative && (size_t)diff <= size) || (!conservative && (size_t)diff >= size));
+  if (newsize != NULL) *newsize = (size_t)diff;
+  return start;
+}
+
+static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* newsize) {
+  return mi_os_page_align_areax(true, addr, size, newsize);
+}
+
+bool _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size) {
+  if (is_zero != NULL) { *is_zero = false; }
+  mi_os_stat_counter_increase(commit_calls, 1);
+
+  // page align range
+  size_t csize;
+  void* start = mi_os_page_align_areax(false /* conservative? */, addr, size, &csize);
+  if (csize == 0) return true;
+
+  // commit
+  bool os_is_zero = false;
+  int err = _mi_prim_commit(start, csize, &os_is_zero);
+  if (err != 0) {
+    _mi_warning_message("cannot commit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
+    return false;
+  }
+  if (os_is_zero && is_zero != NULL) {
+    *is_zero = true;
+    mi_assert_expensive(mi_mem_is_zero(start, csize));
+  }
+  // note: the following seems required for asan (otherwise `mimalloc-test-stress` fails)
+  #ifdef MI_TRACK_ASAN
+  if (os_is_zero) { mi_track_mem_defined(start,csize); }
+             else { mi_track_mem_undefined(start,csize); }
+  #endif
+  mi_os_stat_increase(committed, stat_size);  // use size for precise commit vs. decommit
+  return true;
+}
+
+bool _mi_os_commit(void* addr, size_t size, bool* is_zero) {
+  return _mi_os_commit_ex(addr, size, is_zero, size);
+}
+
+static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit, size_t stat_size) {
+  mi_assert_internal(needs_recommit!=NULL);
+  mi_os_stat_decrease(committed, stat_size);
+
+  // page align
+  size_t csize;
+  void* start = mi_os_page_align_area_conservative(addr, size, &csize);
+  if (csize == 0) return true;
+
+  // decommit
+  *needs_recommit = true;
+  int err = _mi_prim_decommit(start,csize,needs_recommit);
+  if (err != 0) {
+    _mi_warning_message("cannot decommit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
+  }
+  mi_assert_internal(err == 0);
+  return (err == 0);
+}
+
+bool _mi_os_decommit(void* addr, size_t size) {
+  bool needs_recommit;
+  return mi_os_decommit_ex(addr, size, &needs_recommit, size);
+}
+
+
+// Signal to the OS that the address range is no longer in use
+// but may be used later again. This will release physical memory
+// pages and reduce swapping while keeping the memory committed.
+// We page align to a conservative area inside the range to reset.
+bool _mi_os_reset(void* addr, size_t size) {
+  // page align conservatively within the range
+  size_t csize;
+  void* start = mi_os_page_align_area_conservative(addr, size, &csize);
+  if (csize == 0) return true;  // || _mi_os_is_huge_reserved(addr)
+  mi_os_stat_counter_increase(reset, csize);
+  mi_os_stat_counter_increase(reset_calls, 1);
+
+  #if (MI_DEBUG>1) && !MI_SECURE && !MI_TRACK_ENABLED // && !MI_TSAN
+  memset(start, 0, csize); // pretend it is eagerly reset
+  #endif
+
+  int err = _mi_prim_reset(start, csize);
+  if (err != 0) {
+    _mi_warning_message("cannot reset OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
+  }
+  return (err == 0);
+}
+
+
+void _mi_os_reuse( void* addr, size_t size ) {
+  // page align conservatively within the range
+  size_t csize = 0;
+  void* const start = mi_os_page_align_area_conservative(addr, size, &csize);
+  if (csize == 0) return;
+  const int err = _mi_prim_reuse(start, csize);
+  if (err != 0) {
+    _mi_warning_message("cannot reuse OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
+  }
+}
+
+// either resets or decommits memory, returns true if the memory needs
+// to be recommitted if it is to be re-used later on.
+bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stat_size, mi_commit_fun_t* commit_fun, void* commit_fun_arg)
+{
+  if (mi_option_get(mi_option_purge_delay) < 0) return false;  // is purging allowed?
+  mi_os_stat_counter_increase(purge_calls, 1);
+  mi_os_stat_counter_increase(purged, size);
+
+  if (commit_fun != NULL) {
+    bool decommitted = (*commit_fun)(false, p, size, NULL, commit_fun_arg);
+    return decommitted; // needs_recommit?
+  }
+  else if (mi_option_is_enabled(mi_option_purge_decommits) &&   // should decommit?
+           !_mi_preloading())                                   // don't decommit during preloading (unsafe)
+  {
+    bool needs_recommit = true;
+    mi_os_decommit_ex(p, size, &needs_recommit, stat_size);
+    return needs_recommit;
+  }
+  else {
+    if (allow_reset) {  // this can sometimes be not allowed if the range is not fully committed (on Windows, we cannot reset uncommitted memory)
+      _mi_os_reset(p, size);
+    }
+    return false;  // needs no recommit
+  }
+}
+
+// either resets or decommits memory, returns true if the memory needs
+// to be recommitted if it is to be re-used later on.
+bool _mi_os_purge(void* p, size_t size) {
+  return _mi_os_purge_ex(p, size, true, size, NULL, NULL);
+}
+
+
+// Protect a region in memory to be not accessible.
+static  bool mi_os_protectx(void* addr, size_t size, bool protect) {
+  // page align conservatively within the range
+  size_t csize = 0;
+  void* start = mi_os_page_align_area_conservative(addr, size, &csize);
+  if (csize == 0) return false;
+  /*
+  if (_mi_os_is_huge_reserved(addr)) {
+	  _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n");
+  }
+  */
+  int err = _mi_prim_protect(start,csize,protect);
+  if (err != 0) {
+    _mi_warning_message("cannot %s OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", (protect ? "protect" : "unprotect"), err, err, start, csize);
+  }
+  return (err == 0);
+}
+
+bool _mi_os_protect(void* addr, size_t size) {
+  return mi_os_protectx(addr, size, true);
+}
+
+bool _mi_os_unprotect(void* addr, size_t size) {
+  return mi_os_protectx(addr, size, false);
+}
+
+
+
+/* ----------------------------------------------------------------------------
+Support for allocating huge OS pages (1Gib) that are reserved up-front
+and possibly associated with a specific NUMA node. (use `numa_node>=0`)
+-----------------------------------------------------------------------------*/
+#define MI_HUGE_OS_PAGE_SIZE  (MI_GiB)
+
+
+#if (MI_INTPTR_SIZE >= 8)
+// To ensure proper alignment, use our own area for huge OS pages
+static mi_decl_cache_align _Atomic(uintptr_t)  mi_huge_start; // = 0
+
+// Claim an aligned address range for huge pages
+static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
+  if (total_size != NULL) *total_size = 0;
+  const size_t size = pages * MI_HUGE_OS_PAGE_SIZE;
+
+  uintptr_t start = 0;
+  uintptr_t end = 0;
+  uintptr_t huge_start = mi_atomic_load_relaxed(&mi_huge_start);
+  do {
+    start = huge_start;
+    if (start == 0) {
+      // Initialize the start address after the 32TiB area
+      start = ((uintptr_t)8 << 40);   // 8TiB virtual start address
+    #if (MI_SECURE>0 || MI_DEBUG==0)  // security: randomize start of huge pages unless in debug mode
+      uintptr_t r = _mi_theap_random_next(_mi_theap_default());
+      start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF));  // (randomly 12bits)*1GiB == between 0 to 4TiB
+    #endif
+    }
+    end = start + size;
+  } while (!mi_atomic_cas_weak_acq_rel(&mi_huge_start, &huge_start, end));
+
+  if (total_size != NULL) *total_size = size;
+  return (uint8_t*)start;
+}
+#else
+static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
+  MI_UNUSED(pages);
+  if (total_size != NULL) *total_size = 0;
+  return NULL;
+}
+#endif
+
+// Allocate MI_ARENA_SLICE_ALIGN aligned huge pages
+void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid) {
+  *memid = _mi_memid_none();
+  if (psize != NULL) *psize = 0;
+  if (pages_reserved != NULL) *pages_reserved = 0;
+  size_t size = 0;
+  uint8_t* const start = mi_os_claim_huge_pages(pages, &size);
+  if (start == NULL) return NULL; // or 32-bit systems
+
+  // Allocate one page at the time but try to place them contiguously
+  // We allocate one page at the time to be able to abort if it takes too long
+  // or to at least allocate as many as available on the system.
+  mi_msecs_t start_t = _mi_clock_start();
+  size_t page = 0;
+  bool all_zero = true;
+  while (page < pages) {
+    // allocate a page
+    bool is_zero = false;
+    void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE);
+    void* p = NULL;
+    int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &is_zero, &p);
+    if (!is_zero) { all_zero = false;  }
+    if (err != 0) {
+      _mi_warning_message("unable to allocate huge OS page (error: %d (0x%x), address: %p, size: %zx bytes)\n", err, err, addr, MI_HUGE_OS_PAGE_SIZE);
+      break;
+    }
+
+    // Did we succeed at a contiguous address?
+    if (p != addr) {
+      // no success, issue a warning and break
+      if (p != NULL) {
+        _mi_warning_message("could not allocate contiguous huge OS page %zu at %p\n", page, addr);
+        mi_os_prim_free(p, MI_HUGE_OS_PAGE_SIZE, MI_HUGE_OS_PAGE_SIZE, NULL);
+      }
+      break;
+    }
+
+    // success, record it
+    page++;  // increase before timeout check (see issue #711)
+    mi_os_stat_increase(committed, MI_HUGE_OS_PAGE_SIZE);
+    mi_os_stat_increase(reserved, MI_HUGE_OS_PAGE_SIZE);
+
+    // check for timeout
+    if (max_msecs > 0) {
+      mi_msecs_t elapsed = _mi_clock_end(start_t);
+      if (page >= 1) {
+        mi_msecs_t estimate = ((elapsed / (page+1)) * pages);
+        if (estimate > 2*max_msecs) { // seems like we are going to timeout, break
+          elapsed = max_msecs + 1;
+        }
+      }
+      if (elapsed > max_msecs) {
+        _mi_warning_message("huge OS page allocation timed out (after allocating %zu page(s))\n", page);
+        break;
+      }
+    }
+  }
+  mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size);
+  if (pages_reserved != NULL) { *pages_reserved = page; }
+  if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; }
+  if (page != 0) {
+    mi_assert(start != NULL);
+    *memid = _mi_memid_create_os(start, size, true /* is committed */, all_zero, true /* is_large */);
+    memid->memkind = MI_MEM_OS_HUGE;
+    mi_assert(memid->is_pinned);
+    #ifdef MI_TRACK_ASAN
+    if (all_zero) { mi_track_mem_defined(start,size); }
+    #endif
+  }
+  return (page == 0 ? NULL : start);
+}
+
+// free every huge page in a range individually (as we allocated per page)
+// note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems.
+static void mi_os_free_huge_os_pages(void* p, size_t size, mi_subproc_t* subproc) {
+  if (p==NULL || size==0) return;
+  uint8_t* base = (uint8_t*)p;
+  while (size >= MI_HUGE_OS_PAGE_SIZE) {
+    mi_os_prim_free(base, MI_HUGE_OS_PAGE_SIZE, MI_HUGE_OS_PAGE_SIZE, subproc);
+    size -= MI_HUGE_OS_PAGE_SIZE;
+    base += MI_HUGE_OS_PAGE_SIZE;
+  }
+}
+
+
+/* ----------------------------------------------------------------------------
+Support NUMA aware allocation
+-----------------------------------------------------------------------------*/
+
+static _Atomic(size_t) mi_numa_node_count; // = 0   // cache the node count
+
+int _mi_os_numa_node_count(void) {
+  size_t count = mi_atomic_load_acquire(&mi_numa_node_count);
+  if mi_unlikely(count == 0) {
+    long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly?
+    if (ncount > 0 && ncount < INT_MAX) {
+      count = (size_t)ncount;
+    }
+    else {
+      const size_t n = _mi_prim_numa_node_count(); // or detect dynamically
+      if (n == 0 || n > INT_MAX) { count = 1; }
+                            else { count = n; }
+    }
+    mi_atomic_store_release(&mi_numa_node_count, count); // save it
+    _mi_verbose_message("using %zd numa regions\n", count);
+  }
+  mi_assert_internal(count > 0 && count <= INT_MAX);
+  return (int)count;
+}
+
+static int mi_os_numa_node_get(void) {
+  int numa_count = _mi_os_numa_node_count();
+  if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
+  // never more than the node count and >= 0
+  const size_t n = _mi_prim_numa_node();
+  int numa_node = (n < INT_MAX ? (int)n : 0);
+  if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
+  return numa_node;
+}
+
+int _mi_os_numa_node(void) {
+  if mi_likely(mi_atomic_load_relaxed(&mi_numa_node_count) == 1) {
+    return 0;
+  }
+  else {
+    return mi_os_numa_node_get();
+  }
+}
+
+
+/* ----------------------------------------------------------------------------
+  Public API
+-----------------------------------------------------------------------------*/
+#if 0
+mi_decl_export void* mi_os_alloc(size_t size, bool commit, size_t* full_size) {
+  return mi_os_alloc_aligned(size, mi_os_mem_config.alloc_granularity, commit, NULL, full_size);
+}
+
+static void* mi_os_alloc_aligned_ex(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_committed, bool* is_pinned, void** base, size_t* full_size) {
+  mi_memid_t memid = _mi_memid_none();
+  void* p = _mi_os_alloc_aligned(size, alignment, commit, allow_large, &memid);
+  if (p == NULL) return p;
+  if (is_committed != NULL) { *is_committed = memid.initially_committed;  }
+  if (is_pinned != NULL) { *is_pinned = memid.is_pinned;  }
+  if (base != NULL) { *base = memid.mem.os.base;  }
+  if (full_size != NULL) { *full_size = memid.mem.os.size;  }
+  if (!memid.initially_zero && memid.initially_committed) {
+    _mi_memzero_aligned(memid.mem.os.base, memid.mem.os.size);
+  }
+  return p;
+}
+
+mi_decl_export void* mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, void** base, size_t* full_size) {
+  return mi_os_alloc_aligned_ex(size, alignment, commit, false, NULL, NULL, base, full_size);
+}
+
+mi_decl_export void* mi_os_alloc_aligned_allow_large(size_t size, size_t alignment, bool commit, bool* is_committed, bool* is_pinned, void** base, size_t* full_size) {
+  return mi_os_alloc_aligned_ex(size, alignment, commit, true, is_committed, is_pinned, base, full_size);
+}
+
+mi_decl_export void  mi_os_free(void* p, size_t size) {
+  if (p==NULL || size == 0) return;
+  mi_memid_t memid = _mi_memid_create_os(p, size, true, false, false);
+  _mi_os_free(p, size, memid);
+}
+
+mi_decl_export void  mi_os_commit(void* p, size_t size) {
+  _mi_os_commit(p, size, NULL);
+}
+
+mi_decl_export void  mi_os_decommit(void* p, size_t size) {
+  _mi_os_decommit(p, size);
+}
+#endif
diff --git a/3rd/mimalloc-3.2.6/src/page-map.c b/3rd/mimalloc-3.2.6/src/page-map.c
new file mode 100644
index 00000000..f0ca4459
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/page-map.c
@@ -0,0 +1,429 @@
+/*----------------------------------------------------------------------------
+Copyright (c) 2023-2025, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+#include "mimalloc.h"
+#include "mimalloc/internal.h"
+#include "bitmap.h"
+
+static void mi_page_map_cannot_commit(void) {
+  _mi_warning_message("unable to commit the allocation page-map on-demand\n" );
+}
+
+#if MI_PAGE_MAP_FLAT
+
+// The page-map contains a byte for each 64kb slice in the address space.
+// For an address `a` where `ofs = _mi_page_map[a >> 16]`:
+// 0 = unused
+// 1 = the slice at `a & ~0xFFFF` is a mimalloc page.
+// 1 < ofs <= 127 = the slice is part of a page, starting at `(((a>>16) - ofs - 1) << 16)`.
+//
+// 1 byte per slice => 1 TiB address space needs a 2^14 * 2^16 = 16 MiB page map.
+// A full 256 TiB address space (48 bit) needs a 4 GiB page map.
+// A full 4 GiB address space (32 bit) needs only a 64 KiB page map.
+
+mi_decl_cache_align uint8_t* _mi_page_map = NULL;
+static void*        mi_page_map_max_address = NULL;
+static mi_memid_t   mi_page_map_memid;
+
+#define MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT   MI_ARENA_SLICE_SIZE
+static mi_bitmap_t* mi_page_map_commit; // one bit per committed 64 KiB entries
+
+mi_decl_nodiscard static bool mi_page_map_ensure_committed(size_t idx, size_t slice_count);
+
+bool _mi_page_map_init(void) {
+  size_t vbits = (size_t)mi_option_get_clamp(mi_option_max_vabits, 0, MI_SIZE_BITS);
+  if (vbits == 0) {
+    vbits = _mi_os_virtual_address_bits();
+    #if MI_ARCH_X64  // canonical address is limited to the first 128 TiB
+    if (vbits >= 48) { vbits = 47; }
+    #endif
+  }
+
+  // Allocate the page map and commit bits
+  mi_page_map_max_address = (void*)(vbits >= MI_SIZE_BITS ? (SIZE_MAX - MI_ARENA_SLICE_SIZE + 1) : (MI_PU(1) << vbits));
+  const size_t page_map_size = (MI_ZU(1) << (vbits - MI_ARENA_SLICE_SHIFT));
+  const bool commit = (page_map_size <= 1*MI_MiB || mi_option_is_enabled(mi_option_pagemap_commit)); // _mi_os_has_overcommit(); // commit on-access on Linux systems?
+  const size_t commit_bits = _mi_divide_up(page_map_size, MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT);
+  const size_t bitmap_size = (commit ? 0 : mi_bitmap_size(commit_bits, NULL));
+  const size_t reserve_size = bitmap_size + page_map_size;
+  uint8_t* const base = (uint8_t*)_mi_os_alloc_aligned(reserve_size, 1, commit, true /* allow large */, &mi_page_map_memid);
+  if (base==NULL) {
+    _mi_error_message(ENOMEM, "unable to reserve virtual memory for the page map (%zu KiB)\n", page_map_size / MI_KiB);
+    return false;
+  }
+  if (mi_page_map_memid.initially_committed && !mi_page_map_memid.initially_zero) {
+    _mi_warning_message("internal: the page map was committed but not zero initialized!\n");
+    _mi_memzero_aligned(base, reserve_size);
+  }
+  if (bitmap_size > 0) {
+    mi_page_map_commit = (mi_bitmap_t*)base;
+    if (!_mi_os_commit(mi_page_map_commit, bitmap_size, NULL)) {
+      mi_page_map_cannot_commit();
+      return false;
+    }
+    mi_bitmap_init(mi_page_map_commit, commit_bits, true);
+  }
+  _mi_page_map = base + bitmap_size;
+
+  // commit the first part so NULL pointers get resolved without an access violation
+  if (!commit) {
+    if (!mi_page_map_ensure_committed(0, 1)) {
+      mi_page_map_cannot_commit();
+      return false;
+    }
+  }
+  _mi_page_map[0] = 1; // so _mi_ptr_page(NULL) == NULL
+  mi_assert_internal(_mi_ptr_page(NULL)==NULL);
+  return true;
+}
+
+void _mi_page_map_unsafe_destroy(mi_subproc_t* subproc) {
+  mi_assert_internal(subproc != NULL);
+  mi_assert_internal(_mi_page_map != NULL);
+  if (_mi_page_map == NULL) return;
+  _mi_os_free_ex(mi_page_map_memid.mem.os.base, mi_page_map_memid.mem.os.size, true, mi_page_map_memid, subproc);
+  _mi_page_map = NULL;
+  mi_page_map_commit = NULL;
+  mi_page_map_max_address = NULL;
+  mi_page_map_memid = _mi_memid_none();
+}
+
+
+static bool mi_page_map_ensure_committed(size_t idx, size_t slice_count) {
+  // is the page map area that contains the page address committed?
+  // we always set the commit bits so we can track what ranges are in-use.
+  // we only actually commit if the map wasn't committed fully already.
+  if (mi_page_map_commit != NULL) {
+    const size_t commit_idx = idx / MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT;
+    const size_t commit_idx_hi = (idx + slice_count - 1) / MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT;
+    for (size_t i = commit_idx; i <= commit_idx_hi; i++) {  // per bit to avoid crossing over bitmap chunks
+      if (mi_bitmap_is_clear(mi_page_map_commit, i)) {
+        // this may race, in which case we do multiple commits (which is ok)
+        bool is_zero;
+        uint8_t* const start = _mi_page_map + (i * MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT);
+        const size_t   size  = MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT;
+        if (!_mi_os_commit(start, size, &is_zero)) {
+          mi_page_map_cannot_commit();
+          return false;
+        }
+        if (!is_zero && !mi_page_map_memid.initially_zero) { _mi_memzero(start, size); }
+        mi_bitmap_set(mi_page_map_commit, i);
+      }
+    }
+  }
+  #if MI_DEBUG > 0
+  _mi_page_map[idx] = 0;
+  _mi_page_map[idx+slice_count-1] = 0;
+  #endif
+  return true;
+}
+
+
+static size_t mi_page_map_get_idx(mi_page_t* page, uint8_t** page_start, size_t* slice_count) {
+  size_t page_size;
+  *page_start = mi_page_area(page, &page_size);
+  if (page_size > MI_LARGE_PAGE_SIZE) { page_size = MI_LARGE_PAGE_SIZE - MI_ARENA_SLICE_SIZE; }  // furthest interior pointer
+  *slice_count = mi_slice_count_of_size(page_size) + (((uint8_t*)*page_start - (uint8_t*)page)/MI_ARENA_SLICE_SIZE); // add for large aligned blocks
+  return _mi_page_map_index(page);
+}
+
+bool _mi_page_map_register(mi_page_t* page) {
+  mi_assert_internal(page != NULL);
+  mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
+  mi_assert_internal(_mi_page_map != NULL);  // should be initialized before multi-thread access!
+  if mi_unlikely(_mi_page_map == NULL) {
+    if (!_mi_page_map_init()) return false;
+  }
+  mi_assert(_mi_page_map!=NULL);
+  uint8_t* page_start;
+  size_t   slice_count;
+  const size_t idx = mi_page_map_get_idx(page, &page_start, &slice_count);
+
+  if (!mi_page_map_ensure_committed(idx, slice_count)) {
+    return false;
+  }
+
+  // set the offsets
+  for (size_t i = 0; i < slice_count; i++) {
+    mi_assert_internal(i < 128);
+    _mi_page_map[idx + i] = (uint8_t)(i+1);
+  }
+  return true;
+}
+
+void _mi_page_map_unregister(mi_page_t* page) {
+  mi_assert_internal(_mi_page_map != NULL);
+  // get index and count
+  uint8_t* page_start;
+  size_t   slice_count;
+  const size_t idx = mi_page_map_get_idx(page, &page_start, &slice_count);
+  // unset the offsets
+  _mi_memzero(_mi_page_map + idx, slice_count);
+}
+
+void _mi_page_map_unregister_range(void* start, size_t size) {
+  const size_t slice_count = _mi_divide_up(size, MI_ARENA_SLICE_SIZE);
+  const uintptr_t index = _mi_page_map_index(start);
+  // todo: scan the commit bits and clear only those ranges?
+  if (!mi_page_map_ensure_committed(index, slice_count)) { // we commit the range in total;
+    return;
+  }
+  _mi_memzero(&_mi_page_map[index], slice_count);
+}
+
+
+mi_page_t* _mi_safe_ptr_page(const void* p) {
+  if mi_unlikely(p >= mi_page_map_max_address) return NULL;
+  const uintptr_t idx = _mi_page_map_index(p);
+  if mi_unlikely(mi_page_map_commit != NULL && !mi_bitmap_is_set(mi_page_map_commit, idx/MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT)) return NULL;
+  const uintptr_t ofs = _mi_page_map[idx];
+  if mi_unlikely(ofs == 0) return NULL;
+  return (mi_page_t*)((((uintptr_t)p >> MI_ARENA_SLICE_SHIFT) - ofs + 1) << MI_ARENA_SLICE_SHIFT);
+}
+
+mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
+  return (_mi_safe_ptr_page(p) != NULL);
+}
+
+#else
+
+// A 2-level page map
+#define MI_PAGE_MAP_SUB_SIZE          (MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*))
+#define MI_PAGE_MAP_ENTRIES_PER_CBIT  (MI_PAGE_MAP_COUNT / MI_BFIELD_BITS)
+
+mi_decl_cache_align _Atomic(mi_submap_t)* _mi_page_map;
+static size_t       mi_page_map_count;
+static void*        mi_page_map_max_address;
+static mi_memid_t   mi_page_map_memid;
+
+// divide the main map in 64 (`MI_BFIELD_BITS`) parts commit those parts on demand
+static _Atomic(mi_bfield_t)  mi_page_map_commit;
+
+mi_decl_nodiscard static inline bool mi_page_map_is_committed(size_t idx, size_t* pbit_idx) {
+  mi_bfield_t commit = mi_atomic_load_relaxed(&mi_page_map_commit);
+  const size_t bit_idx = idx/MI_PAGE_MAP_ENTRIES_PER_CBIT;
+  mi_assert_internal(bit_idx < MI_BFIELD_BITS);
+  if (pbit_idx != NULL) { *pbit_idx = bit_idx; }
+  return ((commit & (MI_ZU(1) << bit_idx)) != 0);
+}
+
+mi_decl_nodiscard static bool mi_page_map_ensure_committed(size_t idx, mi_submap_t* submap) {
+  mi_assert_internal(submap!=NULL && *submap==NULL);
+  size_t bit_idx;
+  if mi_unlikely(!mi_page_map_is_committed(idx, &bit_idx)) {
+    uint8_t* start = (uint8_t*)&_mi_page_map[bit_idx * MI_PAGE_MAP_ENTRIES_PER_CBIT];
+    if (!_mi_os_commit(start, MI_PAGE_MAP_ENTRIES_PER_CBIT * sizeof(mi_submap_t), NULL)) {
+      mi_page_map_cannot_commit();
+      return false;
+    }
+    mi_atomic_or_acq_rel(&mi_page_map_commit, MI_ZU(1) << bit_idx);
+  }
+  *submap = mi_atomic_load_ptr_acquire(mi_page_t*, &_mi_page_map[idx]); // acquire _mi_page_map_at(idx);
+  return true;
+}
+
+// initialize the page map
+bool _mi_page_map_init(void) {
+  size_t vbits = (size_t)mi_option_get_clamp(mi_option_max_vabits, 0, MI_SIZE_BITS);
+  if (vbits == 0) {
+    vbits = _mi_os_virtual_address_bits();
+    #if MI_ARCH_X64  // canonical address is limited to the first 128 TiB
+    if (vbits >= 48) { vbits = 47; }
+    #endif
+  }
+
+  // Allocate the page map and commit bits
+  mi_assert(MI_MAX_VABITS >= vbits);
+  mi_page_map_max_address = (void*)(vbits >= MI_SIZE_BITS ? (SIZE_MAX - MI_ARENA_SLICE_SIZE + 1) : (MI_PU(1) << vbits));
+  mi_page_map_count = (MI_ZU(1) << (vbits - MI_PAGE_MAP_SUB_SHIFT - MI_ARENA_SLICE_SHIFT));
+  mi_assert(mi_page_map_count <= MI_PAGE_MAP_COUNT);
+  const size_t os_page_size = _mi_os_page_size();
+  const size_t page_map_size = _mi_align_up( mi_page_map_count * sizeof(mi_page_t**), os_page_size);
+  const size_t submap_size = MI_PAGE_MAP_SUB_SIZE;
+  const size_t reserve_size = page_map_size + submap_size;
+  #if MI_SECURE
+  const bool commit = true;  // the whole page map is valid and we can reliably check any pointer
+  #else
+  const bool commit = page_map_size <= 64*MI_KiB ||
+                      mi_option_is_enabled(mi_option_pagemap_commit) || _mi_os_has_overcommit();
+  #endif
+  _mi_page_map = (_Atomic(mi_page_t**)*)_mi_os_alloc_aligned(reserve_size, 1, commit, true /* allow large */, &mi_page_map_memid);
+  if (_mi_page_map==NULL) {
+    _mi_error_message(ENOMEM, "unable to reserve virtual memory for the page map (%zu KiB)\n", page_map_size / MI_KiB);
+    return false;
+  }
+  if (mi_page_map_memid.initially_committed && !mi_page_map_memid.initially_zero) {
+    _mi_warning_message("internal: the page map was committed but not zero initialized!\n");
+    _mi_memzero_aligned(_mi_page_map, page_map_size);
+  }
+  mi_atomic_store_release(&mi_page_map_commit, (mi_page_map_memid.initially_committed ? ~MI_ZU(0) : MI_ZU(0)));
+
+  // ensure there is a submap for the NULL address
+  mi_page_t** const sub0 = (mi_page_t**)((uint8_t*)_mi_page_map + page_map_size);  // we reserved a submap part at the end already
+  if (!mi_page_map_memid.initially_committed) {
+    if (!_mi_os_commit(sub0, submap_size, NULL)) {  // commit full submap (issue #1087)
+      mi_page_map_cannot_commit();
+      return false;
+    }
+  }
+  if (!mi_page_map_memid.initially_zero) {     // initialize low addresses with NULL
+    _mi_memzero_aligned(sub0, submap_size);
+  }
+  mi_submap_t nullsub = NULL;
+  if (!mi_page_map_ensure_committed(0,&nullsub)) {
+    mi_page_map_cannot_commit();
+    return false;
+  }
+  mi_atomic_store_ptr_release(mi_page_t*, &_mi_page_map[0], sub0);
+
+  mi_assert_internal(_mi_ptr_page(NULL)==NULL);
+  return true;
+}
+
+
+void _mi_page_map_unsafe_destroy(mi_subproc_t* subproc) {
+  mi_assert_internal(subproc != NULL);
+  mi_assert_internal(_mi_page_map != NULL);
+  if (_mi_page_map == NULL) return;
+  for (size_t idx = 1; idx < mi_page_map_count; idx++) {  // skip entry 0 (as we allocate that submap at the end of the page_map)
+    // free all sub-maps
+    if (mi_page_map_is_committed(idx, NULL)) {
+      mi_submap_t sub = _mi_page_map_at(idx);
+      if (sub != NULL) {
+        mi_memid_t memid = _mi_memid_create_os(sub, MI_PAGE_MAP_SUB_SIZE, true, false, false);
+        _mi_os_free_ex(memid.mem.os.base, memid.mem.os.size, true, memid, subproc);
+        mi_atomic_store_ptr_release(mi_page_t*, &_mi_page_map[idx], NULL);
+      }
+    }
+  }
+  _mi_os_free_ex(_mi_page_map, mi_page_map_memid.mem.os.size, true, mi_page_map_memid, subproc);
+  _mi_page_map = NULL;
+  mi_page_map_count = 0;
+  mi_page_map_memid = _mi_memid_none();
+  mi_page_map_max_address = NULL;
+  mi_atomic_store_release(&mi_page_map_commit, 0);
+}
+
+
+mi_decl_nodiscard static bool mi_page_map_ensure_submap_at(size_t idx, mi_submap_t* submap) {
+  mi_assert_internal(submap!=NULL && *submap==NULL);
+  mi_submap_t sub = NULL;
+  if (!mi_page_map_ensure_committed(idx, &sub)) {
+    return false;
+  }
+  if mi_unlikely(sub == NULL) {
+    // sub map not yet allocated, alloc now
+    mi_memid_t memid;
+    const size_t submap_size = MI_PAGE_MAP_SUB_SIZE;
+    sub = (mi_submap_t)_mi_os_zalloc(submap_size, &memid);
+    if (sub==NULL) {
+      _mi_warning_message("internal error: unable to extend the page map\n");
+      return false;
+    }
+    mi_submap_t expect = NULL;
+    if (!mi_atomic_cas_ptr_strong_acq_rel(mi_page_t*, &_mi_page_map[idx], &expect, sub)) {
+      // another thread already allocated it.. free and continue
+      _mi_os_free(sub, submap_size, memid);
+      sub = expect;
+    }
+  }
+  mi_assert_internal(sub!=NULL);
+  *submap = sub;
+  return true;
+}
+
+static bool mi_page_map_set_range_prim(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count) {
+  // is the page map area that contains the page address committed?
+  while (slice_count > 0) {
+    mi_submap_t sub = NULL;
+    if (!mi_page_map_ensure_submap_at(idx, &sub)) {
+      return false;
+    };
+    mi_assert_internal(sub!=NULL);
+    // set the offsets for the page
+    while (slice_count > 0 && sub_idx < MI_PAGE_MAP_SUB_COUNT) {
+      sub[sub_idx] = page;
+      slice_count--;
+      sub_idx++;
+    }
+    idx++; // potentially wrap around to the next idx
+    sub_idx = 0;
+  }
+  return true;
+}
+
+static bool mi_page_map_set_range(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count) {
+  if mi_unlikely(!mi_page_map_set_range_prim(page,idx,sub_idx,slice_count)) {
+    // failed to commit, call again to reset the page pointer if needed
+    if (page!=NULL) {
+      mi_page_map_set_range_prim(NULL,idx,sub_idx,slice_count);
+    }
+    return false;
+  }
+  return true;
+}
+
+static size_t mi_page_map_get_idx(mi_page_t* page, size_t* sub_idx, size_t* slice_count) {
+  size_t page_size;
+  uint8_t* page_start = mi_page_area(page, &page_size);
+  if (page_size > MI_LARGE_PAGE_SIZE) { page_size = MI_LARGE_PAGE_SIZE - MI_ARENA_SLICE_SIZE; }  // furthest interior pointer
+  *slice_count = mi_slice_count_of_size(page_size) + ((page_start - (uint8_t*)page)/MI_ARENA_SLICE_SIZE); // add for large aligned blocks
+  return _mi_page_map_index(page, sub_idx);
+}
+
+bool _mi_page_map_register(mi_page_t* page) {
+  mi_assert_internal(page != NULL);
+  mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
+  mi_assert_internal(_mi_page_map != NULL);  // should be initialized before multi-thread access!
+  if mi_unlikely(_mi_page_map == NULL) {
+    if (!_mi_page_map_init()) return false;
+  }
+  mi_assert(_mi_page_map!=NULL);
+  size_t   slice_count;
+  size_t   sub_idx;
+  const size_t idx = mi_page_map_get_idx(page, &sub_idx, &slice_count);
+  return mi_page_map_set_range(page, idx, sub_idx, slice_count);
+}
+
+void _mi_page_map_unregister(mi_page_t* page) {
+  mi_assert_internal(_mi_page_map != NULL);
+  mi_assert_internal(page != NULL);
+  mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
+  if mi_unlikely(_mi_page_map == NULL) return;
+  // get index and count
+  size_t slice_count;
+  size_t sub_idx;
+  const size_t idx = mi_page_map_get_idx(page, &sub_idx, &slice_count);
+  // unset the offsets
+  mi_page_map_set_range(NULL, idx, sub_idx, slice_count);
+}
+
+void _mi_page_map_unregister_range(void* start, size_t size) {
+  if mi_unlikely(_mi_page_map == NULL) return;
+  const size_t slice_count = _mi_divide_up(size, MI_ARENA_SLICE_SIZE);
+  size_t sub_idx;
+  const uintptr_t idx = _mi_page_map_index(start, &sub_idx);
+  mi_page_map_set_range(NULL, idx, sub_idx, slice_count);  // todo: avoid committing if not already committed?
+}
+
+// Return NULL for invalid pointers
+mi_page_t* _mi_safe_ptr_page(const void* p) {
+  if (p==NULL) return NULL;
+  if mi_unlikely(p >= mi_page_map_max_address) return NULL;
+  size_t sub_idx;
+  const size_t idx = _mi_page_map_index(p,&sub_idx);
+  if mi_unlikely(!mi_page_map_is_committed(idx,NULL)) return NULL;
+  mi_page_t** const sub = _mi_page_map[idx];
+  if mi_unlikely(sub==NULL) return NULL;
+  return sub[sub_idx];
+}
+
+mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
+  return (_mi_safe_ptr_page(p) != NULL);
+}
+
+#endif
diff --git a/3rd/mimalloc-3.2.6/src/page-queue.c b/3rd/mimalloc-3.2.6/src/page-queue.c
new file mode 100644
index 00000000..4c54cb9a
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/page-queue.c
@@ -0,0 +1,455 @@
+/*----------------------------------------------------------------------------
+Copyright (c) 2018-2024, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+/* -----------------------------------------------------------
+  Definition of page queues for each block size
+----------------------------------------------------------- */
+
+#ifndef MI_IN_PAGE_C
+#error "this file should be included from 'page.c'"
+// include to help an IDE
+#include "mimalloc.h"
+#include "mimalloc/internal.h"
+#include "mimalloc/atomic.h"
+#endif
+
+/* -----------------------------------------------------------
+  Minimal alignment in machine words (i.e. `sizeof(void*)`)
+----------------------------------------------------------- */
+
+#if (MI_MAX_ALIGN_SIZE > 4*MI_INTPTR_SIZE)
+  #error "define alignment for more than 4x word size for this platform"
+#elif (MI_MAX_ALIGN_SIZE > 2*MI_INTPTR_SIZE)
+  #define MI_ALIGN4W   // 4 machine words minimal alignment
+#elif (MI_MAX_ALIGN_SIZE > MI_INTPTR_SIZE)
+  #define MI_ALIGN2W   // 2 machine words minimal alignment
+#else
+  // ok, default alignment is 1 word
+#endif
+
+
+/* -----------------------------------------------------------
+  Queue query
+----------------------------------------------------------- */
+
+
+static inline bool mi_page_queue_is_huge(const mi_page_queue_t* pq) {
+  return (pq->block_size == (MI_LARGE_MAX_OBJ_SIZE+sizeof(uintptr_t)));
+}
+
+static inline bool mi_page_queue_is_full(const mi_page_queue_t* pq) {
+  return (pq->block_size == (MI_LARGE_MAX_OBJ_SIZE+(2*sizeof(uintptr_t))));
+}
+
+static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) {
+  return (pq->block_size > MI_LARGE_MAX_OBJ_SIZE);
+}
+
+static inline size_t mi_page_queue_count(const mi_page_queue_t* pq) {
+  return pq->count;
+}
+
+/* -----------------------------------------------------------
+  Bins
+----------------------------------------------------------- */
+
+// Return the bin for a given field size.
+// Returns MI_BIN_HUGE if the size is too large.
+// We use `wsize` for the size in "machine word sizes",
+// i.e. byte size == `wsize*sizeof(void*)`.
+static mi_decl_noinline size_t mi_bin(size_t size) {
+  size_t wsize = _mi_wsize_from_size(size);
+#if defined(MI_ALIGN4W)
+  if mi_likely(wsize <= 4) {
+    return (wsize <= 1 ? 1 : (wsize+1)&~1); // round to double word sizes
+  }
+#elif defined(MI_ALIGN2W)
+  if mi_likely(wsize <= 8) {
+    return (wsize <= 1 ? 1 : (wsize+1)&~1); // round to double word sizes
+  }
+#else
+  if mi_likely(wsize <= 8) {
+    return (wsize == 0 ? 1 : wsize);
+  }
+#endif
+  else if mi_unlikely(wsize > MI_LARGE_MAX_OBJ_WSIZE) {
+    return MI_BIN_HUGE;
+  }
+  else {
+    #if defined(MI_ALIGN4W)
+    if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes
+    #endif
+    wsize--;
+    // find the highest bit
+    const size_t b = (MI_SIZE_BITS - 1 - mi_clz(wsize));  // note: wsize != 0
+    // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
+    // - adjust with 3 because we use do not round the first 8 sizes
+    //   which each get an exact bin
+    const size_t bin = ((b << 2) + ((wsize >> (b - 2)) & 0x03)) - 3;
+    mi_assert_internal(bin > 0 && bin < MI_BIN_HUGE);
+    return bin;
+  }
+}
+
+
+
+/* -----------------------------------------------------------
+  Queue of pages with free blocks
+----------------------------------------------------------- */
+
+size_t _mi_bin(size_t size) {
+  return mi_bin(size);
+}
+
+size_t _mi_bin_size(size_t bin) {
+  mi_assert_internal(bin <= MI_BIN_HUGE);
+  return _mi_theap_empty.pages[bin].block_size;
+}
+
+// Good size for allocation
+mi_decl_nodiscard mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept {
+  if (size <= MI_LARGE_MAX_OBJ_SIZE) {
+    return _mi_bin_size(mi_bin(size + MI_PADDING_SIZE));
+  }
+  else {
+    return _mi_align_up(size + MI_PADDING_SIZE,_mi_os_page_size());
+  }
+}
+
+#if (MI_DEBUG>1)
+static bool mi_page_queue_contains(mi_page_queue_t* queue, const mi_page_t* page) {
+  mi_assert_internal(page != NULL);
+  mi_page_t* list = queue->first;
+  while (list != NULL) {
+    mi_assert_internal(list->next == NULL || list->next->prev == list);
+    mi_assert_internal(list->prev == NULL || list->prev->next == list);
+    if (list == page) break;
+    list = list->next;
+  }
+  return (list == page);
+}
+
+#endif
+
+#if (MI_DEBUG>1)
+static bool mi_theap_contains_queue(const mi_theap_t* theap, const mi_page_queue_t* pq) {
+  return (pq >= &theap->pages[0] && pq <= &theap->pages[MI_BIN_FULL]);
+}
+#endif
+
+bool _mi_page_queue_is_valid(mi_theap_t* theap, const mi_page_queue_t* pq) {
+  MI_UNUSED_RELEASE(theap);
+  if (pq==NULL) return false;
+  size_t count = 0; MI_UNUSED_RELEASE(count);
+  mi_page_t* prev = NULL; MI_UNUSED_RELEASE(prev);
+  for (mi_page_t* page = pq->first; page != NULL; page = page->next) {
+    mi_assert_internal(page->prev == prev);
+    if (mi_page_is_in_full(page)) {
+      mi_assert_internal(_mi_wsize_from_size(pq->block_size) == MI_LARGE_MAX_OBJ_WSIZE + 2);
+    }
+    else if (mi_page_is_huge(page)) {
+      mi_assert_internal(_mi_wsize_from_size(pq->block_size) == MI_LARGE_MAX_OBJ_WSIZE + 1);
+    }
+    else {
+      mi_assert_internal(mi_page_block_size(page) == pq->block_size);
+    }
+    mi_assert_internal(page->theap == theap);
+    if (page->next == NULL) {
+      mi_assert_internal(pq->last == page);
+    }
+    count++;
+    prev = page;
+  }
+  mi_assert_internal(pq->count == count);
+  return true;
+}
+
+static size_t mi_page_bin(const mi_page_t* page) {
+  const size_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : (mi_page_is_huge(page) ? MI_BIN_HUGE : mi_bin(mi_page_block_size(page))));
+  mi_assert_internal(bin <= MI_BIN_FULL);
+  return bin;
+}
+
+// returns the page bin without using MI_BIN_FULL for statistics
+size_t _mi_page_stats_bin(const mi_page_t* page) {
+  const size_t bin = (mi_page_is_huge(page) ? MI_BIN_HUGE : mi_bin(mi_page_block_size(page)));
+  mi_assert_internal(bin <= MI_BIN_HUGE);
+  return bin;
+}
+
+static mi_page_queue_t* mi_theap_page_queue_of(mi_theap_t* theap, const mi_page_t* page) {
+  mi_assert_internal(theap!=NULL);
+  const size_t bin = mi_page_bin(page);
+  mi_page_queue_t* pq = &theap->pages[bin];
+  mi_assert_internal((mi_page_block_size(page) == pq->block_size) ||
+                       (mi_page_is_huge(page) && mi_page_queue_is_huge(pq)) ||
+                         (mi_page_is_in_full(page) && mi_page_queue_is_full(pq)));
+  return pq;
+}
+
+static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) {
+  mi_theap_t* theap = mi_page_theap(page);
+  mi_page_queue_t* pq = mi_theap_page_queue_of(theap, page);
+  mi_assert_expensive(mi_page_queue_contains(pq, page));
+  return pq;
+}
+
+// The current small page array is for efficiency and for each
+// small size (up to 256) it points directly to the page for that
+// size without having to compute the bin. This means when the
+// current free page queue is updated for a small bin, we need to update a
+// range of entries in `_mi_page_small_free`.
+static inline void mi_theap_queue_first_update(mi_theap_t* theap, const mi_page_queue_t* pq) {
+  mi_assert_internal(mi_theap_contains_queue(theap,pq));
+  size_t size = pq->block_size;
+  if (size > MI_SMALL_SIZE_MAX) return;
+
+  mi_page_t* page = pq->first;
+  if (pq->first == NULL) page = (mi_page_t*)&_mi_page_empty;
+
+  // find index in the right direct page array
+  size_t start;
+  size_t idx = _mi_wsize_from_size(size);
+  mi_page_t** pages_free = theap->pages_free_direct;
+
+  if (pages_free[idx] == page) return;  // already set
+
+  // find start slot
+  if (idx<=1) {
+    start = 0;
+  }
+  else {
+    // find previous size; due to minimal alignment upto 3 previous bins may need to be skipped
+    size_t bin = mi_bin(size);
+    const mi_page_queue_t* prev = pq - 1;
+    while( bin == mi_bin(prev->block_size) && prev > &theap->pages[0]) {
+      prev--;
+    }
+    start = 1 + _mi_wsize_from_size(prev->block_size);
+    if (start > idx) start = idx;
+  }
+
+  // set size range to the right page
+  mi_assert(start <= idx);
+  for (size_t sz = start; sz <= idx; sz++) {
+    pages_free[sz] = page;
+  }
+}
+
+/*
+static bool mi_page_queue_is_empty(mi_page_queue_t* queue) {
+  return (queue->first == NULL);
+}
+*/
+
+static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {
+  mi_assert_internal(page != NULL);
+  mi_assert_expensive(mi_page_queue_contains(queue, page));
+  mi_assert_internal(queue->count >= 1);
+  mi_assert_internal(mi_page_block_size(page) == queue->block_size ||
+                      (mi_page_is_huge(page) && mi_page_queue_is_huge(queue)) ||
+                        (mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
+  mi_theap_t* theap = mi_page_theap(page);
+  if (page->prev != NULL) page->prev->next = page->next;
+  if (page->next != NULL) page->next->prev = page->prev;
+  if (page == queue->last)  queue->last = page->prev;
+  if (page == queue->first) {
+    queue->first = page->next;
+    // update first
+    mi_assert_internal(mi_theap_contains_queue(theap, queue));
+    mi_theap_queue_first_update(theap,queue);
+  }
+  theap->page_count--;
+  queue->count--;
+  page->next = NULL;
+  page->prev = NULL;
+  mi_page_set_in_full(page,false);
+}
+
+
+static void mi_page_queue_push(mi_theap_t* theap, mi_page_queue_t* queue, mi_page_t* page) {
+  mi_assert_internal(mi_page_theap(page) == theap);
+  mi_assert_internal(!mi_page_queue_contains(queue, page));
+  #if MI_HUGE_PAGE_ABANDON
+  mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
+  #endif
+  mi_assert_internal(mi_page_block_size(page) == queue->block_size ||
+                      (mi_page_is_huge(page) && mi_page_queue_is_huge(queue)) ||
+                        (mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
+
+  mi_page_set_in_full(page, mi_page_queue_is_full(queue));
+
+  page->next = queue->first;
+  page->prev = NULL;
+  if (queue->first != NULL) {
+    mi_assert_internal(queue->first->prev == NULL);
+    queue->first->prev = page;
+    queue->first = page;
+  }
+  else {
+    queue->first = queue->last = page;
+  }
+  queue->count++;
+
+  // update direct
+  mi_theap_queue_first_update(theap, queue);
+  theap->page_count++;
+}
+
+static void mi_page_queue_push_at_end(mi_theap_t* theap, mi_page_queue_t* queue, mi_page_t* page) {
+  mi_assert_internal(mi_page_theap(page) == theap);
+  mi_assert_internal(!mi_page_queue_contains(queue, page));
+
+  mi_assert_internal(mi_page_block_size(page) == queue->block_size ||
+                      (mi_page_is_huge(page) && mi_page_queue_is_huge(queue)) ||
+                       (mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
+
+  mi_page_set_in_full(page, mi_page_queue_is_full(queue));
+
+  page->prev = queue->last;
+  page->next = NULL;
+  if (queue->last != NULL) {
+    mi_assert_internal(queue->last->next == NULL);
+    queue->last->next = page;
+    queue->last = page;
+  }
+  else {
+    queue->first = queue->last = page;
+  }
+  queue->count++;
+
+  // update direct
+  if (queue->first == page) {
+    mi_theap_queue_first_update(theap, queue);
+  }
+  theap->page_count++;
+}
+
+static void mi_page_queue_move_to_front(mi_theap_t* theap, mi_page_queue_t* queue, mi_page_t* page) {
+  mi_assert_internal(mi_page_theap(page) == theap);
+  mi_assert_internal(mi_page_queue_contains(queue, page));
+  if (queue->first == page) return;
+  mi_page_queue_remove(queue, page);
+  mi_page_queue_push(theap, queue, page);
+  mi_assert_internal(queue->first == page);
+}
+
+static void mi_page_queue_enqueue_from_ex(mi_page_queue_t* to, mi_page_queue_t* from, bool enqueue_at_end, mi_page_t* page) {
+  mi_assert_internal(page != NULL);
+  mi_assert_internal(from->count >= 1);
+  mi_assert_expensive(mi_page_queue_contains(from, page));
+  mi_assert_expensive(!mi_page_queue_contains(to, page));
+  const size_t bsize = mi_page_block_size(page);
+  MI_UNUSED(bsize);
+  mi_assert_internal((bsize == to->block_size && bsize == from->block_size) ||
+                     (bsize == to->block_size && mi_page_queue_is_full(from)) ||
+                     (bsize == from->block_size && mi_page_queue_is_full(to)) ||
+                     (mi_page_is_huge(page) && mi_page_queue_is_huge(to)) ||
+                     (mi_page_is_huge(page) && mi_page_queue_is_full(to)));
+
+  mi_theap_t* theap = mi_page_theap(page);
+
+  // delete from `from`
+  if (page->prev != NULL) page->prev->next = page->next;
+  if (page->next != NULL) page->next->prev = page->prev;
+  if (page == from->last)  from->last = page->prev;
+  if (page == from->first) {
+    from->first = page->next;
+    // update first
+    mi_assert_internal(mi_theap_contains_queue(theap, from));
+    mi_theap_queue_first_update(theap, from);
+  }
+  from->count--;
+
+  // insert into `to`
+  to->count++;
+  if (enqueue_at_end) {
+    // enqueue at the end
+    page->prev = to->last;
+    page->next = NULL;
+    if (to->last != NULL) {
+      mi_assert_internal(theap == mi_page_theap(to->last));
+      to->last->next = page;
+      to->last = page;
+    }
+    else {
+      to->first = page;
+      to->last = page;
+      mi_theap_queue_first_update(theap, to);
+    }
+  }
+  else {
+    if (to->first != NULL) {
+      // enqueue at 2nd place
+      mi_assert_internal(theap == mi_page_theap(to->first));
+      mi_page_t* next = to->first->next;
+      page->prev = to->first;
+      page->next = next;
+      to->first->next = page;
+      if (next != NULL) {
+        next->prev = page;
+      }
+      else {
+        to->last = page;
+      }
+    }
+    else {
+      // enqueue at the head (singleton list)
+      page->prev = NULL;
+      page->next = NULL;
+      to->first = page;
+      to->last = page;
+      mi_theap_queue_first_update(theap, to);
+    }
+  }
+
+  mi_page_set_in_full(page, mi_page_queue_is_full(to));
+}
+
+static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) {
+  mi_page_queue_enqueue_from_ex(to, from, true /* enqueue at the end */, page);
+}
+
+static void mi_page_queue_enqueue_from_full(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) {
+  // note: we could insert at the front to increase reuse, but it slows down certain benchmarks (like `alloc-test`)
+  mi_page_queue_enqueue_from_ex(to, from, true /* enqueue at the end of the `to` queue? */, page);
+}
+
+// Only called from `mi_theap_absorb`.
+size_t _mi_page_queue_append(mi_theap_t* theap, mi_page_queue_t* pq, mi_page_queue_t* append) {
+  mi_assert_internal(mi_theap_contains_queue(theap,pq));
+  mi_assert_internal(pq->block_size == append->block_size);
+
+  if (append->first==NULL) return 0;
+
+  // set append pages to new theap and count
+  size_t count = 0;
+  for (mi_page_t* page = append->first; page != NULL; page = page->next) {
+    mi_page_set_theap(page, theap);
+    count++;
+  }
+  mi_assert_internal(count == append->count);
+
+  if (pq->last==NULL) {
+    // take over afresh
+    mi_assert_internal(pq->first==NULL);
+    pq->first = append->first;
+    pq->last = append->last;
+    mi_theap_queue_first_update(theap, pq);
+  }
+  else {
+    // append to end
+    mi_assert_internal(pq->last!=NULL);
+    mi_assert_internal(append->first!=NULL);
+    pq->last->next = append->first;
+    append->first->prev = pq->last;
+    pq->last = append->last;
+  }
+  pq->count += append->count;
+
+  return count;
+}
diff --git a/3rd/mimalloc-3.2.6/src/page.c b/3rd/mimalloc-3.2.6/src/page.c
new file mode 100644
index 00000000..2f3f7f48
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/page.c
@@ -0,0 +1,1020 @@
+/*----------------------------------------------------------------------------
+Copyright (c) 2018-2024, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+/* -----------------------------------------------------------
+  The core of the allocator. Every segment contains
+  pages of a certain block size. The main function
+  exported is `mi_malloc_generic`.
+----------------------------------------------------------- */
+
+#include "mimalloc.h"
+#include "mimalloc/internal.h"
+#include "mimalloc/atomic.h"
+#include "mimalloc/prim.h"
+
+/* -----------------------------------------------------------
+  Definition of page queues for each block size
+----------------------------------------------------------- */
+
+#define MI_IN_PAGE_C
+#include "page-queue.c"
+#undef MI_IN_PAGE_C
+
+
+/* -----------------------------------------------------------
+  Page helpers
+----------------------------------------------------------- */
+
+// Index a block in a page
+static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_start, size_t block_size, size_t i) {
+  MI_UNUSED(page);
+  mi_assert_internal(page != NULL);
+  mi_assert_internal(i <= page->reserved);
+  return (mi_block_t*)((uint8_t*)page_start + (i * block_size));
+}
+
+static bool mi_page_extend_free(mi_theap_t* theap, mi_page_t* page);
+
+#if (MI_DEBUG>=3)
+static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) {
+  mi_assert_internal(_mi_ptr_page(page) == page);
+  size_t count = 0;
+  while (head != NULL) {
+    mi_assert_internal((uint8_t*)head - (uint8_t*)page > (ptrdiff_t)MI_LARGE_PAGE_SIZE || page == _mi_ptr_page(head));
+    count++;
+    head = mi_block_next(page, head);
+  }
+  return count;
+}
+
+/*
+// Start of the page available memory
+static inline uint8_t* mi_page_area(const mi_page_t* page) {
+  return _mi_page_start(_mi_page_segment(page), page, NULL);
+}
+*/
+
+static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) {
+  size_t psize;
+  uint8_t* page_area = mi_page_area(page, &psize);
+  mi_block_t* start = (mi_block_t*)page_area;
+  mi_block_t* end   = (mi_block_t*)(page_area + psize);
+  while(p != NULL) {
+    if (p < start || p >= end) return false;
+    p = mi_block_next(page, p);
+  }
+#if MI_DEBUG>3 // generally too expensive to check this
+  if (page->free_is_zero) {
+    const size_t ubsize = mi_page_usable_block_size(page);
+    for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page, block)) {
+      mi_assert_expensive(mi_mem_is_zero(block + 1, ubsize - sizeof(mi_block_t)));
+    }
+  }
+#endif
+  return true;
+}
+
+static bool mi_page_is_valid_init(mi_page_t* page) {
+  mi_assert_internal(mi_page_block_size(page) > 0);
+  mi_assert_internal(page->used <= page->capacity);
+  mi_assert_internal(page->capacity <= page->reserved);
+
+  // const size_t bsize = mi_page_block_size(page);
+  // uint8_t* start = mi_page_start(page);
+  //mi_assert_internal(start + page->capacity*page->block_size == page->top);
+
+  mi_assert_internal(mi_page_list_is_valid(page,page->free));
+  mi_assert_internal(mi_page_list_is_valid(page,page->local_free));
+
+  #if MI_DEBUG>3 // generally too expensive to check this
+  if (page->free_is_zero) {
+    const size_t ubsize = mi_page_usable_block_size(page);
+    for(mi_block_t* block = page->free; block != NULL; block = mi_block_next(page,block)) {
+      mi_assert_expensive(mi_mem_is_zero(block + 1, ubsize - sizeof(mi_block_t)));
+    }
+  }
+  #endif
+
+  #if !MI_TRACK_ENABLED && !MI_TSAN
+  mi_block_t* tfree = mi_page_thread_free(page);
+  mi_assert_internal(mi_page_list_is_valid(page, tfree));
+  //size_t tfree_count = mi_page_list_count(page, tfree);
+  //mi_assert_internal(tfree_count <= page->thread_freed + 1);
+  #endif
+
+  size_t free_count = mi_page_list_count(page, page->free) + mi_page_list_count(page, page->local_free);
+  mi_assert_internal(page->used + free_count == page->capacity);
+
+  return true;
+}
+
+extern mi_decl_hidden bool _mi_process_is_initialized;             // has mi_process_init been called?
+
+bool _mi_page_is_valid(mi_page_t* page) {
+  mi_assert_internal(mi_page_is_valid_init(page));
+  #if MI_SECURE
+  mi_assert_internal(page->keys[0] != 0);
+  #endif
+  if (!mi_page_is_abandoned(page)) {
+    //mi_assert_internal(!_mi_process_is_initialized);
+    {
+      mi_page_queue_t* pq = mi_page_queue_of(page);
+      mi_assert_internal(mi_page_queue_contains(pq, page));
+      mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_is_huge(page) || mi_page_is_in_full(page));
+      // mi_assert_internal(mi_theap_contains_queue(mi_page_theap(page),pq));
+    }
+  }
+  return true;
+}
+#endif
+
+
+/* -----------------------------------------------------------
+  Page collect the `local_free` and `thread_free` lists
+----------------------------------------------------------- */
+
+static void mi_page_thread_collect_to_local(mi_page_t* page, mi_block_t* head)
+{
+  if (head == NULL) return;
+
+  // find the last block in the list -- also to get a proper use count (without data races)
+  size_t max_count = page->capacity; // cannot collect more than capacity
+  size_t count = 1;
+  mi_block_t* last = head;
+  mi_block_t* next;
+  while ((next = mi_block_next(page, last)) != NULL && count <= max_count) {
+    count++;
+    last = next;
+  }
+
+  // if `count > max_count` there was a memory corruption (possibly infinite list due to double multi-threaded free)
+  if (count > max_count) {
+    _mi_error_message(EFAULT, "corrupted thread-free list\n");
+    return; // the thread-free items cannot be freed
+  }
+
+  // and append the current local free list
+  mi_block_set_next(page, last, page->local_free);
+  page->local_free = head;
+
+  // update counts now
+  mi_assert_internal(count <= UINT16_MAX);
+  mi_assert_internal(page->used >= (uint16_t)count);
+  page->used = page->used - (uint16_t)count;
+}
+
+// Collect the local `thread_free` list using an atomic exchange.
+static void mi_page_thread_free_collect(mi_page_t* page)
+{
+  // atomically capture the thread free list
+  mi_block_t* head;
+  mi_thread_free_t tfreex;
+  mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free);
+  do {
+    head = mi_tf_block(tfree);
+    if mi_likely(head == NULL) return; // return if the list is empty
+    tfreex = mi_tf_create(NULL,mi_tf_is_owned(tfree));  // set the thread free list to NULL
+  } while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tfree, tfreex));  // release is enough?
+  mi_assert_internal(head != NULL);
+
+  // and move it to the local list
+  mi_page_thread_collect_to_local(page, head);
+}
+
+void _mi_page_free_collect(mi_page_t* page, bool force) {
+  mi_assert_internal(page!=NULL);
+
+  // collect the thread free list
+  mi_page_thread_free_collect(page);
+
+  // and the local free list
+  if (page->local_free != NULL) {
+    if mi_likely(page->free == NULL) {
+      // usual case
+      page->free = page->local_free;
+      page->local_free = NULL;
+      page->free_is_zero = false;
+    }
+    else if (force) {
+      // append -- only on shutdown (force) as this is a linear operation
+      mi_block_t* tail = page->local_free;
+      mi_block_t* next;
+      while ((next = mi_block_next(page, tail)) != NULL) {
+        tail = next;
+      }
+      mi_block_set_next(page, tail, page->free);
+      page->free = page->local_free;
+      page->local_free = NULL;
+      page->free_is_zero = false;
+    }
+  }
+
+  mi_assert_internal(!force || page->local_free == NULL);
+}
+
+// Collect elements in the thread-free list starting at `head`. This is an optimized
+// version of `_mi_page_free_collect` to be used from `free.c:_mi_free_collect_mt` that avoids atomic access to `xthread_free`.
+//
+// `head` must be in the `xthread_free` list. It will not collect `head` itself
+// so the `used` count is not fully updated in general. However, if the `head` is
+// the last remaining element, it will be collected and the used count will become `0` (so `mi_page_all_free` becomes true).
+void _mi_page_free_collect_partly(mi_page_t* page, mi_block_t* head) {
+  if (head == NULL) return;
+  mi_block_t* next = mi_block_next(page,head);  // we cannot collect the head element itself as `page->thread_free` may point to it (and we want to avoid atomic ops)
+  if (next != NULL) {
+    mi_block_set_next(page, head, NULL);
+    mi_page_thread_collect_to_local(page, next);
+    if (page->local_free != NULL && page->free == NULL) {
+      page->free = page->local_free;
+      page->local_free = NULL;
+      page->free_is_zero = false;
+    }
+  }
+  if (page->used == 1) {
+    // all elements are free'd since we skipped the `head` element itself
+    mi_assert_internal(mi_tf_block(mi_atomic_load_relaxed(&page->xthread_free)) == head);
+    mi_assert_internal(mi_block_next(page,head) == NULL);
+    _mi_page_free_collect(page, false);  // collect the final element
+  }
+}
+
+
+/* -----------------------------------------------------------
+  Page fresh and retire
+----------------------------------------------------------- */
+
+/*
+// called from segments when reclaiming abandoned pages
+void _mi_page_reclaim(mi_theap_t* theap, mi_page_t* page) {
+  // mi_page_set_theap(page, theap);
+  // _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after theap is set)
+  _mi_page_free_collect(page, false); // ensure used count is up to date
+
+  mi_assert_expensive(mi_page_is_valid_init(page));
+  // mi_assert_internal(mi_page_theap(page) == theap);
+  // mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE);
+
+  // TODO: push on full queue immediately if it is full?
+  mi_page_queue_t* pq = mi_theap_page_queue_of(theap, page);
+  mi_page_queue_push(theap, pq, page);
+  mi_assert_expensive(_mi_page_is_valid(page));
+}
+*/
+
+// called from `mi_free` on a reclaim, and fresh_alloc if we get an abandoned page
+void _mi_theap_page_reclaim(mi_theap_t* theap, mi_page_t* page)
+{
+  mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
+  mi_assert_internal(_mi_ptr_page(page)==page);
+  mi_assert_internal(mi_page_is_owned(page));
+  mi_assert_internal(mi_page_is_abandoned(page));
+
+  mi_page_set_theap(page,theap);
+  _mi_page_free_collect(page, false); // ensure used count is up to date
+  mi_page_queue_t* pq = mi_theap_page_queue_of(theap, page);
+  mi_page_queue_push_at_end(theap, pq, page);
+  mi_assert_expensive(_mi_page_is_valid(page));
+}
+
+void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
+  _mi_page_free_collect(page, false); // ensure used count is up to date
+  if (mi_page_all_free(page)) {
+    _mi_page_free(page, pq);
+  }
+  else {
+    mi_page_queue_remove(pq, page);
+    mi_theap_t* theap = page->theap;
+    mi_page_set_theap(page, NULL);
+    page->theap = theap; // don't actually set theap to NULL so we can reclaim_on_free within the same theap
+    _mi_arenas_page_abandon(page, theap);
+    _mi_arenas_collect(false, false, theap->tld); // allow purging
+  }
+}
+
+
+// allocate a fresh page from an arena
+static mi_page_t* mi_page_fresh_alloc(mi_theap_t* theap, mi_page_queue_t* pq, size_t block_size, size_t page_alignment) {
+  #if !MI_HUGE_PAGE_ABANDON
+  mi_assert_internal(pq != NULL);
+  mi_assert_internal(mi_theap_contains_queue(theap, pq));
+  mi_assert_internal(page_alignment > 0 || block_size > MI_LARGE_MAX_OBJ_SIZE || block_size == pq->block_size);
+  #endif
+  mi_page_t* page = _mi_arenas_page_alloc(theap, block_size, page_alignment);
+  if (page == NULL) {
+    // out-of-memory
+    return NULL;
+  }
+  if (mi_page_is_abandoned(page)) {
+    _mi_theap_page_reclaim(theap, page);
+    if (!mi_page_immediate_available(page)) {
+      if (mi_page_is_expandable(page)) {
+        if (!mi_page_extend_free(theap, page)) {
+          return NULL; // cannot commit
+        };
+      }
+      else {
+        mi_assert(false); // should not happen?
+        return NULL;
+      }
+    }
+  }
+  else if (pq != NULL) {
+    mi_page_queue_push(theap, pq, page);
+  }
+  mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size);
+  mi_assert_expensive(_mi_page_is_valid(page));
+  return page;
+}
+
+// Get a fresh page to use
+static mi_page_t* mi_page_fresh(mi_theap_t* theap, mi_page_queue_t* pq) {
+  mi_assert_internal(mi_theap_contains_queue(theap, pq));
+  mi_page_t* page = mi_page_fresh_alloc(theap, pq, pq->block_size, 0);
+  if (page==NULL) return NULL;
+  mi_assert_internal(pq->block_size==mi_page_block_size(page));
+  mi_assert_internal(pq==mi_theap_page_queue_of(theap, page));
+  return page;
+}
+
+
+/* -----------------------------------------------------------
+  Unfull, abandon, free and retire
+----------------------------------------------------------- */
+
+// Move a page from the full list back to a regular list (called from thread-local mi_free)
+void _mi_page_unfull(mi_page_t* page) {
+  mi_assert_internal(page != NULL);
+  mi_assert_expensive(_mi_page_is_valid(page));
+  mi_assert_internal(mi_page_is_in_full(page));
+  mi_assert_internal(!mi_page_theap(page)->allow_page_abandon);
+  if (!mi_page_is_in_full(page)) return;
+
+  mi_theap_t* theap = mi_page_theap(page);
+  mi_page_queue_t* pqfull = &theap->pages[MI_BIN_FULL];
+  mi_page_set_in_full(page, false); // to get the right queue
+  mi_page_queue_t* pq = mi_theap_page_queue_of(theap, page);
+  mi_page_set_in_full(page, true);
+  mi_page_queue_enqueue_from_full(pq, pqfull, page);
+}
+
+static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) {
+  mi_assert_internal(pq == mi_page_queue_of(page));
+  mi_assert_internal(!mi_page_immediate_available(page));
+  mi_assert_internal(!mi_page_is_in_full(page));
+
+  mi_theap_t* theap = mi_page_theap(page);
+  if (theap->allow_page_abandon) {
+    // abandon full pages (this is the usual case in order to allow for sharing of memory between theaps)
+    _mi_page_abandon(page, pq);
+  }
+  else if (!mi_page_is_in_full(page)) {
+    // put full pages in a theap local queue (this is for theaps that cannot abandon, for example, if the theap can be destroyed)
+    mi_page_queue_enqueue_from(&mi_page_theap(page)->pages[MI_BIN_FULL], pq, page);
+    _mi_page_free_collect(page, false);  // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set
+  }
+}
+
+
+// Free a page with no more free blocks
+void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq) {
+  mi_assert_internal(page != NULL);
+  mi_assert_expensive(_mi_page_is_valid(page));
+  mi_assert_internal(pq == mi_page_queue_of(page));
+  mi_assert_internal(mi_page_all_free(page));
+  // mi_assert_internal(mi_page_thread_free_flag(page)!=MI_DELAYED_FREEING);
+
+  // no more aligned blocks in here
+  mi_page_set_has_interior_pointers(page, false);
+
+  // remove from the page list
+  // (no need to do _mi_theap_delayed_free first as all blocks are already free)
+  mi_page_queue_remove(pq, page);
+
+  // and free it
+  mi_theap_t* theap = mi_page_theap(page); mi_assert_internal(theap!=NULL);
+  mi_page_set_theap(page,NULL);
+  _mi_arenas_page_free(page, theap);
+  _mi_arenas_collect(false, false, theap->tld);  // allow purging
+}
+
+#define MI_MAX_RETIRE_SIZE    MI_LARGE_OBJ_SIZE_MAX   // should be less than size for MI_BIN_HUGE
+#define MI_RETIRE_CYCLES      (16)
+
+// Retire a page with no more used blocks
+// Important to not retire too quickly though as new
+// allocations might coming.
+// Note: called from `mi_free` and benchmarks often
+// trigger this due to freeing everything and then
+// allocating again so careful when changing this.
+void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
+  mi_assert_internal(page != NULL);
+  mi_assert_expensive(_mi_page_is_valid(page));
+  mi_assert_internal(mi_page_all_free(page));
+
+  mi_page_set_has_interior_pointers(page, false);
+
+  // don't retire too often..
+  // (or we end up retiring and re-allocating most of the time)
+  // NOTE: refine this more: we should not retire if this
+  // is the only page left with free blocks. It is not clear
+  // how to check this efficiently though...
+  // for now, we don't retire if it is the only page left of this size class.
+  mi_page_queue_t* pq = mi_page_queue_of(page);
+  #if MI_RETIRE_CYCLES > 0
+  const size_t bsize = mi_page_block_size(page);
+  if mi_likely( /* bsize < MI_MAX_RETIRE_SIZE && */ !mi_page_queue_is_special(pq)) {  // not full or huge queue?
+    if (pq->last==page && pq->first==page) { // the only page in the queue?
+      mi_theap_t* theap = mi_page_theap(page);
+      #if MI_STAT>0
+      mi_theap_stat_counter_increase(theap, pages_retire, 1);
+      #endif
+      page->retire_expire = (bsize <= MI_SMALL_MAX_OBJ_SIZE ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
+      mi_assert_internal(pq >= theap->pages);
+      const size_t index = pq - theap->pages;
+      mi_assert_internal(index < MI_BIN_FULL && index < MI_BIN_HUGE);
+      if (index < theap->page_retired_min) theap->page_retired_min = index;
+      if (index > theap->page_retired_max) theap->page_retired_max = index;
+      mi_assert_internal(mi_page_all_free(page));
+      return; // don't free after all
+    }
+  }
+  #endif
+  _mi_page_free(page, pq);
+}
+
+// free retired pages: we don't need to look at the entire queues
+// since we only retire pages that are at the head position in a queue.
+void _mi_theap_collect_retired(mi_theap_t* theap, bool force) {
+  size_t min = MI_BIN_FULL;
+  size_t max = 0;
+  for(size_t bin = theap->page_retired_min; bin <= theap->page_retired_max; bin++) {
+    mi_page_queue_t* pq   = &theap->pages[bin];
+    mi_page_t*       page = pq->first;
+    if (page != NULL && page->retire_expire != 0) {
+      if (mi_page_all_free(page)) {
+        page->retire_expire--;
+        if (force || page->retire_expire == 0) {
+          _mi_page_free(pq->first, pq);
+        }
+        else {
+          // keep retired, update min/max
+          if (bin < min) min = bin;
+          if (bin > max) max = bin;
+        }
+      }
+      else {
+        page->retire_expire = 0;
+      }
+    }
+  }
+  theap->page_retired_min = min;
+  theap->page_retired_max = max;
+}
+
+/*
+static void mi_theap_collect_full_pages(mi_theap_t* theap) {
+  // note: normally full pages get immediately abandoned and the full queue is always empty
+  // this path is only used if abandoning is disabled due to a destroy-able theap or options
+  // set by the user.
+  mi_page_queue_t* pq = &theap->pages[MI_BIN_FULL];
+  for (mi_page_t* page = pq->first; page != NULL; ) {
+    mi_page_t* next = page->next;         // get next in case we free the page
+    _mi_page_free_collect(page, false);   // register concurrent free's
+    // no longer full?
+    if (!mi_page_is_full(page)) {
+      if (mi_page_all_free(page)) {
+        _mi_page_free(page, pq);
+      }
+      else {
+        _mi_page_unfull(page);
+      }
+    }
+    page = next;
+  }
+}
+*/
+
+
+/* -----------------------------------------------------------
+  Initialize the initial free list in a page.
+  In secure mode we initialize a randomized list by
+  alternating between slices.
+----------------------------------------------------------- */
+
+#define MI_MAX_SLICE_SHIFT  (6)   // at most 64 slices
+#define MI_MAX_SLICES       (1UL << MI_MAX_SLICE_SHIFT)
+#define MI_MIN_SLICES       (2)
+
+static void mi_page_free_list_extend_secure(mi_theap_t* const theap, mi_page_t* const page, const size_t bsize, const size_t extend) {
+  #if (MI_SECURE<3)
+  mi_assert_internal(page->free == NULL);
+  mi_assert_internal(page->local_free == NULL);
+  #endif
+  mi_assert_internal(page->capacity + extend <= page->reserved);
+  mi_assert_internal(bsize == mi_page_block_size(page));
+  void* const page_area = mi_page_start(page);
+
+  // initialize a randomized free list
+  // set up `slice_count` slices to alternate between
+  size_t shift = MI_MAX_SLICE_SHIFT;
+  while ((extend >> shift) == 0) {
+    shift--;
+  }
+  const size_t slice_count = (size_t)1U << shift;
+  const size_t slice_extend = extend / slice_count;
+  mi_assert_internal(slice_extend >= 1);
+  mi_block_t* blocks[MI_MAX_SLICES];   // current start of the slice
+  size_t      counts[MI_MAX_SLICES];   // available objects in the slice
+  for (size_t i = 0; i < slice_count; i++) {
+    blocks[i] = mi_page_block_at(page, page_area, bsize, page->capacity + i*slice_extend);
+    counts[i] = slice_extend;
+  }
+  counts[slice_count-1] += (extend % slice_count);  // final slice holds the modulus too (todo: distribute evenly?)
+
+  // and initialize the free list by randomly threading through them
+  // set up first element
+  const uintptr_t r = _mi_theap_random_next(theap);
+  size_t current = r % slice_count;
+  counts[current]--;
+  mi_block_t* const free_start = blocks[current];
+  // and iterate through the rest; use `random_shuffle` for performance
+  uintptr_t rnd = _mi_random_shuffle(r|1); // ensure not 0
+  for (size_t i = 1; i < extend; i++) {
+    // call random_shuffle only every INTPTR_SIZE rounds
+    const size_t round = i%MI_INTPTR_SIZE;
+    if (round == 0) rnd = _mi_random_shuffle(rnd);
+    // select a random next slice index
+    size_t next = ((rnd >> 8*round) & (slice_count-1));
+    while (counts[next]==0) {                            // ensure it still has space
+      next++;
+      if (next==slice_count) next = 0;
+    }
+    // and link the current block to it
+    counts[next]--;
+    mi_block_t* const block = blocks[current];
+    blocks[current] = (mi_block_t*)((uint8_t*)block + bsize);  // bump to the following block
+    mi_block_set_next(page, block, blocks[next]);   // and set next; note: we may have `current == next`
+    current = next;
+  }
+  // prepend to the free list (usually NULL)
+  mi_block_set_next(page, blocks[current], page->free);  // end of the list
+  page->free = free_start;
+}
+
+static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t bsize, const size_t extend)
+{
+  #if (MI_SECURE<3)
+  mi_assert_internal(page->free == NULL);
+  mi_assert_internal(page->local_free == NULL);
+  #endif
+  mi_assert_internal(page->capacity + extend <= page->reserved);
+  mi_assert_internal(bsize == mi_page_block_size(page));
+  void* const page_area = mi_page_start(page);
+
+  mi_block_t* const start = mi_page_block_at(page, page_area, bsize, page->capacity);
+
+  // initialize a sequential free list
+  mi_block_t* const last = mi_page_block_at(page, page_area, bsize, page->capacity + extend - 1);
+  mi_block_t* block = start;
+  while(block <= last) {
+    mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize);
+    mi_block_set_next(page,block,next);
+    block = next;
+  }
+  // prepend to free list (usually `NULL`)
+  mi_block_set_next(page, last, page->free);
+  page->free = start;
+}
+
+/* -----------------------------------------------------------
+  Page initialize and extend the capacity
+----------------------------------------------------------- */
+
+#define MI_MAX_EXTEND_SIZE    (4*1024)      // heuristic, one OS page seems to work well.
+#if (MI_SECURE>=3)
+#define MI_MIN_EXTEND         (8*MI_SECURE) // extend at least by this many
+#else
+#define MI_MIN_EXTEND         (1)
+#endif
+
+// Extend the capacity (up to reserved) by initializing a free list
+// We do at most `MI_MAX_EXTEND` to avoid touching too much memory
+// Note: we also experimented with "bump" allocation on the first
+// allocations but this did not speed up any benchmark (due to an
+// extra test in malloc? or cache effects?)
+mi_decl_nodiscard bool static mi_page_extend_free(mi_theap_t* theap, mi_page_t* page) {
+  mi_assert_expensive(mi_page_is_valid_init(page));
+  #if (MI_SECURE<3)
+  mi_assert(page->free == NULL);
+  mi_assert(page->local_free == NULL);
+  if (page->free != NULL) return true;
+  #endif
+  if (page->capacity >= page->reserved) return true;
+
+  size_t page_size;
+  //uint8_t* page_start =
+  mi_page_area(page, &page_size);
+  #if MI_STAT>0
+  mi_theap_stat_counter_increase(theap, pages_extended, 1);
+  #endif
+
+  // calculate the extend count
+  const size_t bsize = mi_page_block_size(page);
+  size_t extend = (size_t)page->reserved - page->capacity;
+  mi_assert_internal(extend > 0);
+
+  size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/bsize);
+  if (max_extend < MI_MIN_EXTEND) { max_extend = MI_MIN_EXTEND; }
+  mi_assert_internal(max_extend > 0);
+
+  if (extend > max_extend) {
+    // ensure we don't touch memory beyond the page to reduce page commit.
+    // the `lean` benchmark tests this. Going from 1 to 8 increases rss by 50%.
+    extend = max_extend;
+  }
+
+  mi_assert_internal(extend > 0 && extend + page->capacity <= page->reserved);
+  mi_assert_internal(extend < (1UL<<16));
+
+  // commit on demand?
+  if (page->slice_committed > 0) {
+    const size_t needed_size = (page->capacity + extend)*bsize;
+    const size_t needed_commit = _mi_align_up( mi_page_slice_offset_of(page, needed_size), MI_PAGE_MIN_COMMIT_SIZE );
+    if (needed_commit > page->slice_committed) {
+      mi_assert_internal(((needed_commit - page->slice_committed) % _mi_os_page_size()) == 0);
+      if (!_mi_os_commit(mi_page_slice_start(page) + page->slice_committed, needed_commit - page->slice_committed, NULL)) {
+        return false;
+      }
+      page->slice_committed = needed_commit;
+    }
+  }
+
+  // and append the extend the free list
+  if (extend < MI_MIN_SLICES || MI_SECURE<3) { //!mi_option_is_enabled(mi_option_secure)) {
+    mi_page_free_list_extend(page, bsize, extend );
+  }
+  else {
+    mi_page_free_list_extend_secure(theap, page, bsize, extend);
+  }
+  // enable the new free list
+  page->capacity += (uint16_t)extend;
+  #if MI_STAT>0
+  mi_theap_stat_increase(theap, page_committed, extend * bsize);
+  #endif
+  mi_assert_expensive(mi_page_is_valid_init(page));
+  return true;
+}
+
+// Initialize a fresh page (that is already partially initialized)
+mi_decl_nodiscard bool _mi_page_init(mi_theap_t* theap, mi_page_t* page) {
+  mi_assert(page != NULL);
+  mi_assert(theap!=NULL);
+  page->heap = (_mi_is_heap_main(theap->heap) ? NULL : theap->heap); // faster for `mi_page_associated_theap`
+  mi_page_set_theap(page, theap);
+
+  size_t page_size;
+  uint8_t* page_start = mi_page_area(page, &page_size); MI_UNUSED(page_start);
+  mi_track_mem_noaccess(page_start,page_size);
+  mi_assert_internal(page_size / mi_page_block_size(page) < (1L<<16));
+  mi_assert_internal(page->reserved > 0);
+  #if (MI_PADDING || MI_ENCODE_FREELIST)
+  page->keys[0] = _mi_theap_random_next(theap);
+  page->keys[1] = _mi_theap_random_next(theap);
+  #endif
+  #if MI_DEBUG>2
+  if (page->memid.initially_zero) {
+    mi_track_mem_defined(page->page_start, mi_page_committed(page));
+    mi_assert_expensive(mi_mem_is_zero(page_start, mi_page_committed(page)));
+  }
+  #endif
+
+  mi_assert_internal(page->theap!=NULL);
+  mi_assert_internal(page->theap == mi_page_theap(page));
+  mi_assert_internal(page->capacity == 0);
+  mi_assert_internal(page->free == NULL);
+  mi_assert_internal(page->used == 0);
+  mi_assert_internal(mi_page_is_owned(page));
+  mi_assert_internal(page->xthread_free == 1);
+  mi_assert_internal(page->next == NULL);
+  mi_assert_internal(page->prev == NULL);
+  mi_assert_internal(page->retire_expire == 0);
+  mi_assert_internal(!mi_page_has_interior_pointers(page));
+  #if (MI_PADDING || MI_ENCODE_FREELIST)
+  mi_assert_internal(page->keys[0] != 0);
+  mi_assert_internal(page->keys[1] != 0);
+  #endif
+  mi_assert_expensive(mi_page_is_valid_init(page));
+
+  // initialize an initial free list
+  if (!mi_page_extend_free(theap,page)) return false;
+  mi_assert(mi_page_immediate_available(page));
+  return true;
+}
+
+
+/* -----------------------------------------------------------
+  Find pages with free blocks
+-------------------------------------------------------------*/
+
+// Find a page with free blocks of `page->block_size`.
+static mi_decl_noinline mi_page_t* mi_page_queue_find_free_ex(mi_theap_t* theap, mi_page_queue_t* pq, bool first_try)
+{
+  // search through the pages in "next fit" order
+  size_t count = 0;
+  long candidate_limit = 0;          // we reset this on the first candidate to limit the search
+  long page_full_retain = (pq->block_size > MI_SMALL_MAX_OBJ_SIZE ? 0 : theap->page_full_retain); // only retain small pages
+  mi_page_t* page_candidate = NULL;  // a page with free space
+  mi_page_t* page = pq->first;
+
+  while (page != NULL)
+  {
+    mi_page_t* next = page->next; // remember next (as this page can move to another queue)
+    count++;
+    candidate_limit--;
+
+    // search up to N pages for a best candidate
+
+    // is the local free list non-empty?
+    bool immediate_available = mi_page_immediate_available(page);
+    if (!immediate_available) {
+      // collect freed blocks by us and other threads to we get a proper use count
+      _mi_page_free_collect(page, false);
+      immediate_available = mi_page_immediate_available(page);
+    }
+
+    // if the page is completely full, move it to the `mi_pages_full`
+    // queue so we don't visit long-lived pages too often.
+    if (!immediate_available && !mi_page_is_expandable(page)) {
+      page_full_retain--;
+      if (page_full_retain < 0) {
+        mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page));
+        mi_page_to_full(page, pq);
+      }
+    }
+    else {
+      // the page has free space, make it a candidate
+      // we prefer non-expandable pages with high usage as candidates (to reduce commit, and increase chances of free-ing up pages)
+      if (page_candidate == NULL) {
+        page_candidate = page;
+        candidate_limit = _mi_option_get_fast(mi_option_page_max_candidates);
+      }
+      else if (mi_page_all_free(page_candidate)) {
+        _mi_page_free(page_candidate, pq);
+        page_candidate = page;
+      }
+      // prefer to reuse fuller pages (in the hope the less used page gets freed)
+      else if (page->used >= page_candidate->used && !mi_page_is_mostly_used(page)) { // && !mi_page_is_expandable(page)) {
+        page_candidate = page;
+      }
+      // if we find a non-expandable candidate, or searched for N pages, return with the best candidate
+      if (immediate_available || candidate_limit <= 0) {
+        mi_assert_internal(page_candidate!=NULL);
+        break;
+      }
+    }
+
+  #if 0
+    // first-fit algorithm without candidates
+    // If the page contains free blocks, we are done
+    if (mi_page_immediate_available(page) || mi_page_is_expandable(page)) {
+      break;  // pick this one
+    }
+
+    // If the page is completely full, move it to the `mi_pages_full`
+    // queue so we don't visit long-lived pages too often.
+    mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page));
+    mi_page_to_full(page, pq);
+  #endif
+
+    page = next;
+  } // for each page
+
+  mi_theap_stat_counter_increase(theap, page_searches, count);
+  mi_theap_stat_counter_increase(theap, page_searches_count, 1);
+
+  // set the page to the best candidate
+  if (page_candidate != NULL) {
+    page = page_candidate;
+  }
+  if (page != NULL) {
+    if (!mi_page_immediate_available(page)) {
+      mi_assert_internal(mi_page_is_expandable(page));
+      if (!mi_page_extend_free(theap, page)) {
+        page = NULL; // failed to extend
+      }
+    }
+    mi_assert_internal(page == NULL || mi_page_immediate_available(page));
+  }
+
+  if (page == NULL) {
+    _mi_theap_collect_retired(theap, false); // perhaps make a page available
+    page = mi_page_fresh(theap, pq);
+    mi_assert_internal(page == NULL || mi_page_immediate_available(page));
+    if (page == NULL && first_try) {
+      // out-of-memory _or_ an abandoned page with free blocks was reclaimed, try once again
+      page = mi_page_queue_find_free_ex(theap, pq, false);
+      mi_assert_internal(page == NULL || mi_page_immediate_available(page));
+    }
+  }
+  else {
+    mi_assert_internal(page == NULL || mi_page_immediate_available(page));
+    // move the page to the front of the queue
+    mi_page_queue_move_to_front(theap, pq, page);
+    page->retire_expire = 0;
+    // _mi_theap_collect_retired(theap, false); // update retire counts; note: increases rss on MemoryLoad bench so don't do this
+  }
+  mi_assert_internal(page == NULL || mi_page_immediate_available(page));
+
+
+  return page;
+}
+
+
+
+// Find a page with free blocks of `size`.
+static mi_page_t* mi_find_free_page(mi_theap_t* theap, mi_page_queue_t* pq) {
+  // mi_page_queue_t* pq = mi_page_queue(theap, size);
+  mi_assert_internal(!mi_page_queue_is_huge(pq));
+
+  // check the first page: we even do this with candidate search or otherwise we re-search every time
+  mi_page_t* page = pq->first;
+  if mi_likely(page != NULL && mi_page_immediate_available(page)) {
+    #if (MI_SECURE>=3) // in secure mode, we extend half the time to increase randomness
+    if (page->capacity < page->reserved && ((_mi_theap_random_next(theap) & 1) == 1)) {
+      (void)mi_page_extend_free(theap, page);  // ok if this fails
+      mi_assert_internal(mi_page_immediate_available(page));
+    }
+    #endif
+    page->retire_expire = 0;
+    return page; // fast path
+  }
+  else {
+    return mi_page_queue_find_free_ex(theap, pq, true);
+  }
+}
+
+
+/* -----------------------------------------------------------
+  Users can register a deferred free function called
+  when the `free` list is empty. Since the `local_free`
+  is separate this is deterministically called after
+  a certain number of allocations.
+----------------------------------------------------------- */
+
+static mi_deferred_free_fun* volatile deferred_free = NULL;
+static _Atomic(void*) deferred_arg; // = NULL
+
+void _mi_deferred_free(mi_theap_t* theap, bool force) {
+  theap->heartbeat++;
+  if (deferred_free != NULL && !theap->tld->recurse) {
+    theap->tld->recurse = true;
+    deferred_free(force, theap->heartbeat, mi_atomic_load_ptr_relaxed(void,&deferred_arg));
+    theap->tld->recurse = false;
+  }
+}
+
+void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noexcept {
+  deferred_free = fn;
+  mi_atomic_store_ptr_release(void,&deferred_arg, arg);
+}
+
+
+/* -----------------------------------------------------------
+  General allocation
+----------------------------------------------------------- */
+
+// Huge pages contain just one block, and the segment contains just that page.
+// Huge pages are also use if the requested alignment is very large (> MI_BLOCK_ALIGNMENT_MAX)
+// so their size is not always `> MI_LARGE_OBJ_SIZE_MAX`.
+static mi_page_t* mi_huge_page_alloc(mi_theap_t* theap, size_t size, size_t page_alignment, mi_page_queue_t* pq) {
+  const size_t block_size = _mi_os_good_alloc_size(size);
+  // mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE || page_alignment > 0);
+  #if MI_HUGE_PAGE_ABANDON
+  #error todo.
+  #else
+  // mi_page_queue_t* pq = mi_page_queue(theap, MI_LARGE_MAX_OBJ_SIZE+1);  // always in the huge queue regardless of the block size
+  mi_assert_internal(mi_page_queue_is_huge(pq));
+  #endif
+  mi_page_t* page = mi_page_fresh_alloc(theap, pq, block_size, page_alignment);
+  if (page != NULL) {
+    mi_assert_internal(mi_page_block_size(page) >= size);
+    mi_assert_internal(mi_page_immediate_available(page));
+    mi_assert_internal(mi_page_is_huge(page));
+    mi_assert_internal(mi_page_is_singleton(page));
+    #if MI_HUGE_PAGE_ABANDON
+    mi_assert_internal(mi_page_is_abandoned(page));
+    mi_page_set_theap(page, NULL);
+    #endif
+    mi_theap_stat_increase(theap, malloc_huge, mi_page_block_size(page));
+    mi_theap_stat_counter_increase(theap, malloc_huge_count, 1);
+  }
+  return page;
+}
+
+
+// Allocate a page
+// Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed.
+static mi_page_t* mi_find_page(mi_theap_t* theap, size_t size, size_t huge_alignment) mi_attr_noexcept {
+  const size_t req_size = size - MI_PADDING_SIZE;  // correct for padding_size in case of an overflow on `size`
+  if mi_unlikely(req_size > MI_MAX_ALLOC_SIZE) {
+    _mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size);
+    return NULL;
+  }
+  mi_page_queue_t* pq = mi_page_queue(theap, (huge_alignment > 0 ? MI_LARGE_MAX_OBJ_SIZE+1 : size));
+  // huge allocation?
+  if mi_unlikely(mi_page_queue_is_huge(pq) || req_size > MI_MAX_ALLOC_SIZE) {
+    return mi_huge_page_alloc(theap,size,huge_alignment,pq);
+  }
+  else {
+    // otherwise find a page with free blocks in our size segregated queues
+    #if MI_PADDING
+    mi_assert_internal(size >= MI_PADDING_SIZE);
+    #endif
+    return mi_find_free_page(theap, pq);
+  }
+}
+
+
+// Generic allocation routine if the fast path (`alloc.c:mi_page_malloc`) does not succeed.
+// Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed.
+// The `huge_alignment` is normally 0 but is set to a multiple of MI_SLICE_SIZE for
+// very large requested alignments in which case we use a huge singleton page.
+// Note: we put `bool zero, size_t huge_alignment` into one parameter (with zero in the low bit)
+// to use 4 parameters which compiles better on msvc for the malloc fast path.
+void* _mi_malloc_generic(mi_theap_t* theap, size_t size, size_t zero_huge_alignment, size_t* usable) mi_attr_noexcept
+{
+  const bool zero = ((zero_huge_alignment & 1) != 0);
+  const size_t huge_alignment = (zero_huge_alignment & ~1);
+
+  #if !MI_THEAP_INITASNULL
+  mi_assert_internal(theap != NULL);
+  #endif
+
+  // initialize if necessary
+  if mi_unlikely(!mi_theap_is_initialized(theap)) {
+    if (theap==&_mi_theap_empty_wrong) {
+      // we were unable to allocate a theap for a first-class heap
+      return NULL;
+    }
+    // otherwise we initialize the thread and its default theap
+    mi_thread_init();
+    theap = _mi_theap_default();
+    if mi_unlikely(!mi_theap_is_initialized(theap)) { return NULL; }
+    mi_assert_internal(_mi_theap_default()==theap);
+  }
+  mi_assert_internal(mi_theap_is_initialized(theap));
+
+  // do administrative tasks every N generic mallocs
+  if mi_unlikely(++theap->generic_count >= 1000) {
+    theap->generic_collect_count += theap->generic_count;
+    theap->generic_count = 0;
+    // call potential deferred free routines
+    _mi_deferred_free(theap, false);
+
+    // collect every once in a while (10000 by default)
+    const long generic_collect = mi_option_get_clamp(mi_option_generic_collect, 1, 1000000L);
+    if (theap->generic_collect_count >= generic_collect) {
+      theap->generic_collect_count = 0;
+      mi_theap_collect(theap, false /* force? */);
+    }
+  }
+
+  // find (or allocate) a page of the right size
+  mi_page_t* page = mi_find_page(theap, size, huge_alignment);
+  if mi_unlikely(page == NULL) { // first time out of memory, try to collect and retry the allocation once more
+    mi_theap_collect(theap, true /* force? */);
+    page = mi_find_page(theap, size, huge_alignment);
+  }
+
+  if mi_unlikely(page == NULL) { // out of memory
+    const size_t req_size = size - MI_PADDING_SIZE;  // correct for padding_size in case of an overflow on `size`
+    _mi_error_message(ENOMEM, "unable to allocate memory (%zu bytes)\n", req_size);
+    return NULL;
+  }
+
+  mi_assert_internal(mi_page_immediate_available(page));
+  mi_assert_internal(mi_page_block_size(page) >= size);
+  mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
+  mi_assert_internal(_mi_ptr_page(page)==page);
+
+  // and try again, this time succeeding! (i.e. this should never recurse through _mi_page_malloc)
+  if (usable!=NULL) { *usable = mi_page_usable_block_size(page); }
+  void* p;
+  if mi_likely(!zero) {
+    p = _mi_page_malloc(theap, page, size);
+  }
+  else {
+    p = _mi_page_malloc_zeroed(theap, page, size);
+  }
+  mi_assert_internal(p != NULL);
+
+  // move full pages to the full queue
+  if (mi_page_is_full(page)) {
+    mi_page_to_full(page, mi_page_queue_of(page));
+  }
+  return p;
+}
diff --git a/3rd/mimalloc-3.2.6/src/prim/emscripten/prim.c b/3rd/mimalloc-3.2.6/src/prim/emscripten/prim.c
new file mode 100644
index 00000000..ab3d59ee
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/prim/emscripten/prim.c
@@ -0,0 +1,252 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2018-2025, Microsoft Research, Daan Leijen, Alon Zakai
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+// This file is included in `src/prim/prim.c`
+
+#include "mimalloc.h"
+#include "mimalloc/internal.h"
+#include "mimalloc/atomic.h"
+#include "mimalloc/prim.h"
+
+// Design
+// ======
+//
+// mimalloc is built on top of emmalloc. emmalloc is a minimal allocator on top
+// of sbrk. The reason for having three layers here is that we want mimalloc to
+// be able to allocate and release system memory properly, the same way it would
+// when using VirtualAlloc on Windows or mmap on POSIX, and sbrk is too limited.
+// Specifically, sbrk can only go up and down, and not "skip" over regions, and
+// so we end up either never freeing memory to the system, or we can get stuck
+// with holes.
+//
+// Atm wasm generally does *not* free memory back the system: once grown, we do
+// not shrink back down (https://github.com/WebAssembly/design/issues/1397).
+// However, that is expected to improve
+// (https://github.com/WebAssembly/memory-control/blob/main/proposals/memory-control/Overview.md)
+// and so we do not want to bake those limitations in here.
+//
+// Even without that issue, we want our system allocator to handle holes, that
+// is, it should merge freed regions and allow allocating new content there of
+// the full size, etc., so that we do not waste space. That means that the
+// system allocator really does need to handle the general problem of allocating
+// and freeing variable-sized chunks of memory in a random order, like malloc/
+// free do. And so it makes sense to layer mimalloc on top of such an
+// implementation.
+//
+// emmalloc makes sense for the lower level because it is small and simple while
+// still fully handling merging of holes etc. It is not the most efficient
+// allocator, but our assumption is that mimalloc needs to be fast while the
+// system allocator underneath it is called much less frequently.
+//
+
+//---------------------------------------------
+// init
+//---------------------------------------------
+
+void _mi_prim_mem_init( mi_os_mem_config_t* config) {
+  config->page_size = 64*MI_KiB; // WebAssembly has a fixed page size: 64KiB
+  config->alloc_granularity = 16;
+  config->has_overcommit = false;
+  config->has_partial_free = false;
+  config->has_virtual_reserve = false;
+}
+
+extern void emmalloc_free(void*);
+
+int _mi_prim_free(void* addr, size_t size) {
+  if (size==0) return 0;
+  emmalloc_free(addr);
+  return 0;
+}
+
+
+//---------------------------------------------
+// Allocation
+//---------------------------------------------
+
+extern void* emmalloc_memalign(size_t alignment, size_t size);
+
+// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
+int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
+  MI_UNUSED(try_alignment); MI_UNUSED(allow_large); MI_UNUSED(commit); MI_UNUSED(hint_addr);
+  *is_large = false;
+  // TODO: Track the highest address ever seen; first uses of it are zeroes.
+  //       That assumes no one else uses sbrk but us (they could go up,
+  //       scribble, and then down), but we could assert on that perhaps.
+  *is_zero = false;
+  // emmalloc has a minimum alignment size.
+  #define MIN_EMMALLOC_ALIGN           8
+  if (try_alignment < MIN_EMMALLOC_ALIGN) {
+    try_alignment = MIN_EMMALLOC_ALIGN;
+  }
+  void* p = emmalloc_memalign(try_alignment, size);
+  *addr = p;
+  if (p == 0) {
+    return ENOMEM;
+  }
+  return 0;
+}
+
+
+//---------------------------------------------
+// Commit/Reset
+//---------------------------------------------
+
+int _mi_prim_commit(void* addr, size_t size, bool* is_zero) {
+  MI_UNUSED(addr); MI_UNUSED(size);
+  // See TODO above.
+  *is_zero = false;
+  return 0;
+}
+
+int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) {
+  MI_UNUSED(addr); MI_UNUSED(size);
+  *needs_recommit = false;
+  return 0;
+}
+
+int _mi_prim_reset(void* addr, size_t size) {
+  MI_UNUSED(addr); MI_UNUSED(size);
+  return 0;
+}
+
+int _mi_prim_reuse(void* addr, size_t size) {
+  MI_UNUSED(addr); MI_UNUSED(size);
+  return 0;
+}
+
+int _mi_prim_protect(void* addr, size_t size, bool protect) {
+  MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(protect);
+  return 0;
+}
+
+
+//---------------------------------------------
+// Huge pages and NUMA nodes
+//---------------------------------------------
+
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
+  MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node);
+  *is_zero = true;
+  *addr = NULL;
+  return ENOSYS;
+}
+
+size_t _mi_prim_numa_node(void) {
+  return 0;
+}
+
+size_t _mi_prim_numa_node_count(void) {
+  return 1;
+}
+
+
+//----------------------------------------------------------------
+// Clock
+//----------------------------------------------------------------
+
+#include <emscripten/html5.h>
+
+mi_msecs_t _mi_prim_clock_now(void) {
+  return emscripten_date_now();
+}
+
+
+//----------------------------------------------------------------
+// Process info
+//----------------------------------------------------------------
+
+void _mi_prim_process_info(mi_process_info_t* pinfo)
+{
+  // use defaults
+  MI_UNUSED(pinfo);
+}
+
+
+//----------------------------------------------------------------
+// Output
+//----------------------------------------------------------------
+
+#include <emscripten/console.h>
+
+void _mi_prim_out_stderr( const char* msg) {
+  emscripten_console_error(msg);
+}
+
+
+//----------------------------------------------------------------
+// Environment
+//----------------------------------------------------------------
+
+bool _mi_prim_getenv(const char* name, char* result, size_t result_size) {
+  // For code size reasons, do not support environ customization for now.
+  MI_UNUSED(name);
+  MI_UNUSED(result);
+  MI_UNUSED(result_size);
+  return false;
+}
+
+
+//----------------------------------------------------------------
+// Random
+//----------------------------------------------------------------
+
+bool _mi_prim_random_buf(void* buf, size_t buf_len) {
+  int err = getentropy(buf, buf_len);
+  return !err;
+}
+
+
+//----------------------------------------------------------------
+// Thread init/done
+//----------------------------------------------------------------
+
+#if defined(MI_USE_PTHREADS)
+
+// use pthread local storage keys to detect thread ending
+// (and used with MI_TLS_PTHREADS for the default theap)
+pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1);
+
+static void mi_pthread_done(void* value) {
+  if (value!=NULL) {
+    _mi_thread_done((mi_theap_t*)value);
+  }
+}
+
+void _mi_prim_thread_init_auto_done(void) {
+  mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1));
+  pthread_key_create(&_mi_heap_default_key, &mi_pthread_done);
+}
+
+void _mi_prim_thread_done_auto_done(void) {
+  // nothing to do
+}
+
+void _mi_prim_thread_associate_default_theap(mi_theap_t* theap) {
+  if (_mi_heap_default_key != (pthread_key_t)(-1)) {  // can happen during recursive invocation on freeBSD
+    pthread_setspecific(_mi_heap_default_key, theap);
+  }
+}
+
+#else
+
+void _mi_prim_thread_init_auto_done(void) {
+  // nothing
+}
+
+void _mi_prim_thread_done_auto_done(void) {
+  // nothing
+}
+
+void _mi_prim_thread_associate_default_theap(mi_theap_t* theap) {
+  MI_UNUSED(theap);
+}
+#endif
+
+bool _mi_prim_thread_is_in_threadpool(void) {
+  return false;
+}
diff --git a/3rd/mimalloc-2.0.9/src/alloc-override-osx.c b/3rd/mimalloc-3.2.6/src/prim/osx/alloc-override-zone.c
similarity index 95%
rename from 3rd/mimalloc-2.0.9/src/alloc-override-osx.c
rename to 3rd/mimalloc-3.2.6/src/prim/osx/alloc-override-zone.c
index a2819a8b..aa971c39 100644
--- a/3rd/mimalloc-2.0.9/src/alloc-override-osx.c
+++ b/3rd/mimalloc-3.2.6/src/prim/osx/alloc-override-zone.c
@@ -6,7 +6,7 @@ terms of the MIT license. A copy of the license can be found in the file
 -----------------------------------------------------------------------------*/
 
 #include "mimalloc.h"
-#include "mimalloc-internal.h"
+#include "mimalloc/internal.h"
 
 #if defined(MI_MALLOC_OVERRIDE)
 
@@ -64,7 +64,8 @@ static void* zone_valloc(malloc_zone_t* zone, size_t size) {
 
 static void zone_free(malloc_zone_t* zone, void* p) {
   MI_UNUSED(zone);
-  mi_cfree(p);
+  // mi_cfree(p);  // checked free as `zone_free` may be called with invalid pointers
+  mi_free(p); // with the page_map and pagemap_commit=1 we can use the regular free
 }
 
 static void* zone_realloc(malloc_zone_t* zone, void* p, size_t newsize) {
@@ -83,7 +84,7 @@ static void zone_destroy(malloc_zone_t* zone) {
 }
 
 static unsigned zone_batch_malloc(malloc_zone_t* zone, size_t size, void** ps, unsigned count) {
-  size_t i;
+  unsigned i;
   for (i = 0; i < count; i++) {
     ps[i] = zone_malloc(zone, size);
     if (ps[i] == NULL) break;
@@ -195,7 +196,7 @@ static malloc_introspection_t mi_introspect = {
   .log = &intro_log,
   .force_lock = &intro_force_lock,
   .force_unlock = &intro_force_unlock,
-#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6)
+#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) && !defined(__ppc__)
   .statistics = &intro_statistics,
   .zone_locked = &intro_zone_locked,
 #endif
@@ -216,7 +217,7 @@ static malloc_zone_t mi_malloc_zone = {
   .batch_malloc = &zone_batch_malloc,
   .batch_free = &zone_batch_free,
   .introspect = &mi_introspect,
-#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6)
+#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) && !defined(__ppc__)
   #if defined(MAC_OS_X_VERSION_10_14) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_14)
   .version = 10,
   #else
@@ -225,7 +226,9 @@ static malloc_zone_t mi_malloc_zone = {
   // switch to version 9+ on OSX 10.6 to support memalign.
   .memalign = &zone_memalign,
   .free_definite_size = &zone_free_definite_size,
+  #if defined(MAC_OS_X_VERSION_10_7) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7)
   .pressure_relief = &zone_pressure_relief,
+  #endif
   #if defined(MAC_OS_X_VERSION_10_14) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_14)
   .claimed_address = &zone_claimed_address,
   #endif
@@ -416,11 +419,12 @@ static inline malloc_zone_t* mi_get_default_zone(void)
 }
 
 #if defined(__clang__)
-__attribute__((constructor(0)))
+__attribute__((constructor(101))) // highest priority
 #else
-__attribute__((constructor))      // seems not supported by g++-11 on the M1
+__attribute__((constructor))      // priority level is not supported by gcc
 #endif
-static void _mi_macos_override_malloc() {
+__attribute__((used))
+static void _mi_macos_override_malloc(void) {
   malloc_zone_t* purgeable_zone = NULL;
 
   #if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6)
diff --git a/3rd/mimalloc-3.2.6/src/prim/osx/prim.c b/3rd/mimalloc-3.2.6/src/prim/osx/prim.c
new file mode 100644
index 00000000..8a2f4e8a
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/prim/osx/prim.c
@@ -0,0 +1,9 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+// We use the unix/prim.c with the mmap API on macOSX
+#include "../unix/prim.c"
diff --git a/3rd/mimalloc-3.2.6/src/prim/prim.c b/3rd/mimalloc-3.2.6/src/prim/prim.c
new file mode 100644
index 00000000..5147bae8
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/prim/prim.c
@@ -0,0 +1,76 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+// Select the implementation of the primitives
+// depending on the OS.
+
+#if defined(_WIN32)
+#include "windows/prim.c"  // VirtualAlloc (Windows)
+
+#elif defined(__APPLE__)
+#include "osx/prim.c"      // macOSX (actually defers to mmap in unix/prim.c)
+
+#elif defined(__wasi__)
+#define MI_USE_SBRK
+#include "wasi/prim.c"     // memory-grow or sbrk (Wasm)
+
+#elif defined(__EMSCRIPTEN__)
+#include "emscripten/prim.c" // emmalloc_*, + pthread support
+
+#else
+#include "unix/prim.c"     // mmap() (Linux, macOSX, BSD, Illumnos, Haiku, DragonFly, etc.)
+
+#endif
+
+// Generic process initialization
+#ifndef MI_PRIM_HAS_PROCESS_ATTACH
+#if defined(__GNUC__) || defined(__clang__)
+  // gcc,clang: use the constructor/destructor attribute
+  // which for both seem to run before regular constructors/destructors
+  #if defined(__clang__)
+    #define mi_attr_constructor __attribute__((constructor(101)))
+    #define mi_attr_destructor  __attribute__((destructor(101)))
+  #else
+    #define mi_attr_constructor __attribute__((constructor))
+    #define mi_attr_destructor  __attribute__((destructor))
+  #endif
+  static void mi_attr_constructor mi_process_attach(void) {
+    _mi_auto_process_init();
+  }
+  static void mi_attr_destructor mi_process_detach(void) {
+    _mi_auto_process_done();
+  }
+#elif defined(__cplusplus)
+  // C++: use static initialization to detect process start/end
+  // This is not guaranteed to be first/last but the best we can generally do?
+  struct mi_init_done_t {
+    mi_init_done_t() {
+      _mi_auto_process_init();
+    }
+    ~mi_init_done_t() {
+      _mi_auto_process_done();
+    }
+  };
+  static mi_init_done_t mi_init_done;
+ #else
+  #pragma message("define a way to call _mi_auto_process_init/done on your platform")
+#endif
+#endif
+
+// Generic allocator init/done callback
+#ifndef MI_PRIM_HAS_ALLOCATOR_INIT
+bool _mi_is_redirected(void) {
+  return false;
+}
+bool _mi_allocator_init(const char** message) {
+  if (message != NULL) { *message = NULL; }
+  return true;
+}
+void _mi_allocator_done(void) {
+  // nothing to do
+}
+#endif
diff --git a/3rd/mimalloc-3.2.6/src/prim/readme.md b/3rd/mimalloc-3.2.6/src/prim/readme.md
new file mode 100644
index 00000000..380dd3a7
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/prim/readme.md
@@ -0,0 +1,9 @@
+## Portability Primitives
+
+This is the portability layer where all primitives needed from the OS are defined.
+
+- `include/mimalloc/prim.h`: primitive portability API definition.
+- `prim.c`: Selects one of `unix/prim.c`, `wasi/prim.c`, or `windows/prim.c` depending on the host platform
+            (and on macOS, `osx/prim.c` defers to `unix/prim.c`).
+
+Note: still work in progress, there may still be places in the sources that still depend on OS ifdef's.
\ No newline at end of file
diff --git a/3rd/mimalloc-3.2.6/src/prim/unix/prim.c b/3rd/mimalloc-3.2.6/src/prim/unix/prim.c
new file mode 100644
index 00000000..ba8dfb1f
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/prim/unix/prim.c
@@ -0,0 +1,991 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2018-2025, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+// This file is included in `src/prim/prim.c`
+
+#ifndef _DEFAULT_SOURCE
+#define _DEFAULT_SOURCE   // ensure mmap flags and syscall are defined
+#endif
+
+#if defined(__sun)
+// illumos provides new mman.h api when any of these are defined
+// otherwise the old api based on caddr_t which predates the void pointers one.
+// stock solaris provides only the former, chose to atomically to discard those
+// flags only here rather than project wide tough.
+#undef _XOPEN_SOURCE
+#undef _POSIX_C_SOURCE
+#endif
+
+#include "mimalloc.h"
+#include "mimalloc/internal.h"
+#include "mimalloc/prim.h"
+
+#include <sys/mman.h>  // mmap
+#include <unistd.h>    // sysconf
+#include <fcntl.h>     // open, close, read, access
+#include <stdlib.h>    // getenv, arc4random_buf
+
+#if defined(__linux__)
+  #include <features.h>
+  #include <sys/prctl.h>    // THP disable, PR_SET_VMA
+  #include <sys/sysinfo.h>  // sysinfo
+  #if defined(__GLIBC__) && !defined(PR_SET_VMA)
+  #include <linux/prctl.h>
+  #endif
+  #if defined(__GLIBC__)
+  #include <linux/mman.h>   // linux mmap flags
+  #else
+  #include <sys/mman.h>
+  #endif
+#elif defined(__APPLE__)
+  #include <AvailabilityMacros.h>
+  #include <TargetConditionals.h>
+  #if !defined(TARGET_OS_OSX) || TARGET_OS_OSX   // see issue #879, used to be (!TARGET_IOS_IPHONE && !TARGET_IOS_SIMULATOR)
+  #include <mach/vm_statistics.h>    // VM_MAKE_TAG, VM_FLAGS_SUPERPAGE_SIZE_2MB, etc.
+  #endif
+  #if !defined(MAC_OS_X_VERSION_10_7)
+  #define MAC_OS_X_VERSION_10_7   1070
+  #endif
+  #include <sys/sysctl.h>
+#elif defined(__FreeBSD__) || defined(__DragonFly__)
+  #include <sys/param.h>
+  #if __FreeBSD_version >= 1200000
+  #include <sys/cpuset.h>
+  #include <sys/domainset.h>
+  #endif
+  #include <sys/sysctl.h>
+#endif
+
+#if (defined(__linux__) && !defined(__ANDROID__)) || defined(__FreeBSD__)
+  #define MI_HAS_SYSCALL_H
+  #include <sys/syscall.h>
+#endif
+
+#if !defined(MADV_DONTNEED) && defined(POSIX_MADV_DONTNEED)  // QNX
+#define MADV_DONTNEED  POSIX_MADV_DONTNEED
+#endif
+#if !defined(MADV_FREE) && defined(POSIX_MADV_FREE)  // QNX
+#define MADV_FREE  POSIX_MADV_FREE
+#endif
+
+#define MI_UNIX_LARGE_PAGE_SIZE (2*MI_MiB) // TODO: can we query the OS for this?
+
+//------------------------------------------------------------------------------------
+// Use syscalls for some primitives to allow for libraries that override open/read/close etc.
+// and do allocation themselves; using syscalls prevents recursion when mimalloc is
+// still initializing (issue #713)
+// Declare inline to avoid unused function warnings.
+//------------------------------------------------------------------------------------
+
+#if defined(MI_HAS_SYSCALL_H) && defined(SYS_open) && defined(SYS_close) && defined(SYS_read) && defined(SYS_access)
+
+static inline int mi_prim_open(const char* fpath, int open_flags) {
+  return syscall(SYS_open,fpath,open_flags,0);
+}
+static inline ssize_t mi_prim_read(int fd, void* buf, size_t bufsize) {
+  return syscall(SYS_read,fd,buf,bufsize);
+}
+static inline int mi_prim_close(int fd) {
+  return syscall(SYS_close,fd);
+}
+static inline int mi_prim_access(const char *fpath, int mode) {
+  return syscall(SYS_access,fpath,mode);
+}
+
+#else
+
+static inline int mi_prim_open(const char* fpath, int open_flags) {
+  return open(fpath,open_flags);
+}
+static inline ssize_t mi_prim_read(int fd, void* buf, size_t bufsize) {
+  return read(fd,buf,bufsize);
+}
+static inline int mi_prim_close(int fd) {
+  return close(fd);
+}
+static inline int mi_prim_access(const char *fpath, int mode) {
+  return access(fpath,mode);
+}
+
+#endif
+
+
+
+//---------------------------------------------
+// init
+//---------------------------------------------
+
+static bool unix_detect_overcommit(void) {
+  bool os_overcommit = true;
+  #if defined(__linux__)
+    int fd = mi_prim_open("/proc/sys/vm/overcommit_memory", O_RDONLY);
+    if (fd >= 0) {
+      char buf[32];
+      ssize_t nread = mi_prim_read(fd, &buf, sizeof(buf));
+      mi_prim_close(fd);
+      // <https://www.kernel.org/doc/Documentation/vm/overcommit-accounting>
+      // 0: heuristic overcommit, 1: always overcommit, 2: never overcommit (ignore NORESERVE)
+      if (nread >= 1) {
+        os_overcommit = (buf[0] == '0' || buf[0] == '1');
+      }
+    }
+  #elif defined(__FreeBSD__)
+    int val = 0;
+    size_t olen = sizeof(val);
+    if (sysctlbyname("vm.overcommit", &val, &olen, NULL, 0) == 0) {
+      os_overcommit = (val != 0);
+    }
+  #else
+    // default: overcommit is true
+  #endif
+  return os_overcommit;
+}
+
+static bool unix_detect_thp(void) {
+  bool thp_enabled = false;
+  #if defined(__linux__)
+  int fd = mi_prim_open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
+  if (fd >= 0) {
+    char buf[32];
+    ssize_t nread = mi_prim_read(fd, &buf, sizeof(buf));
+    mi_prim_close(fd);
+    // <https://www.kernel.org/doc/html/latest/admin-guide/mm/transhuge.html>
+    // between brackets is the current value, for example: always [madvise] never
+    if (nread >= 1) {
+      thp_enabled = (_mi_strnstr(buf,32,"[never]") == NULL);
+    }
+  }
+  #endif
+  return thp_enabled;
+}
+
+// try to detect the physical memory dynamically (if possible)
+static void unix_detect_physical_memory( size_t page_size, size_t* physical_memory_in_kib ) {
+  #if defined(CTL_HW) && (defined(HW_PHYSMEM64) || defined(HW_MEMSIZE))  // freeBSD, macOS
+    MI_UNUSED(page_size);
+    int64_t physical_memory = 0;
+    size_t length = sizeof(int64_t);
+    #if defined(HW_PHYSMEM64)
+    int mib[2] = { CTL_HW, HW_PHYSMEM64 };
+    #else
+    int mib[2] = { CTL_HW, HW_MEMSIZE };
+    #endif
+    const int err = sysctl(mib, 2, &physical_memory, &length, NULL, 0);
+    if (err==0 && physical_memory > 0) {
+      const int64_t phys_in_kib = physical_memory / MI_KiB;
+      if (phys_in_kib > 0 && (uint64_t)phys_in_kib <= SIZE_MAX) {
+        *physical_memory_in_kib = (size_t)phys_in_kib;
+      }
+    }
+  #elif defined(__linux__)
+    MI_UNUSED(page_size);
+    struct sysinfo info; _mi_memzero_var(info);
+    const int err = sysinfo(&info);
+    if (err==0 && info.totalram > 0 && info.totalram <= SIZE_MAX) {
+      *physical_memory_in_kib = (size_t)info.totalram / MI_KiB;
+    }
+  #elif defined(_SC_PHYS_PAGES)  // do not use by default as it might cause allocation (by using `fopen` to parse /proc/meminfo) (issue #1100)
+    const long pphys = sysconf(_SC_PHYS_PAGES);
+    const size_t psize_in_kib = page_size / MI_KiB;
+    if (psize_in_kib > 0 && pphys > 0 && (unsigned long)pphys <= SIZE_MAX && (size_t)pphys <= (SIZE_MAX/psize_in_kib)) {
+      *physical_memory_in_kib = (size_t)pphys * psize_in_kib;
+    }
+  #endif
+}
+
+void _mi_prim_mem_init( mi_os_mem_config_t* config )
+{
+  long psize = sysconf(_SC_PAGESIZE);
+  if (psize > 0 && (unsigned long)psize < SIZE_MAX) {
+    config->page_size = (size_t)psize;
+    config->alloc_granularity = (size_t)psize;
+    unix_detect_physical_memory(config->page_size, &config->physical_memory_in_kib);
+  }
+  config->large_page_size = MI_UNIX_LARGE_PAGE_SIZE;
+  config->has_overcommit = unix_detect_overcommit();
+  config->has_partial_free = true;    // mmap can free in parts
+  config->has_virtual_reserve = true; // todo: check if this true for NetBSD?  (for anonymous mmap with PROT_NONE)
+  config->has_transparent_huge_pages = unix_detect_thp();
+
+  // disable transparent huge pages for this process?
+  #if (defined(__linux__) || defined(__ANDROID__)) && defined(PR_GET_THP_DISABLE)
+  #if defined(MI_NO_THP)
+  if (true)
+  #else
+  if (!mi_option_is_enabled(mi_option_allow_thp)) // disable THP if requested through an option
+  #endif
+  {
+    config->has_transparent_huge_pages = false;
+    int val = 0;
+    if (prctl(PR_GET_THP_DISABLE, &val, 0, 0, 0) != 0) {
+      // Most likely since distros often come with always/madvise settings.
+      val = 1;
+      // Disabling only for mimalloc process rather than touching system wide settings
+      (void)prctl(PR_SET_THP_DISABLE, &val, 0, 0, 0);
+    }
+  }
+  #endif
+}
+
+
+//---------------------------------------------
+// free
+//---------------------------------------------
+
+int _mi_prim_free(void* addr, size_t size ) {
+  if (size==0) return 0;
+  bool err = (munmap(addr, size) == -1);
+  return (err ? errno : 0);
+}
+
+
+//---------------------------------------------
+// mmap
+//---------------------------------------------
+
+static int unix_madvise(void* addr, size_t size, int advice) {
+  #if defined(__sun)
+  int res = madvise((caddr_t)addr, size, advice);  // Solaris needs cast (issue #520)
+  #elif defined(__QNX__)
+  int res = posix_madvise(addr, size, advice);
+  #else
+  int res = madvise(addr, size, advice);
+  #endif
+  return (res==0 ? 0 : errno);
+}
+
+static void* unix_mmap_prim(void* addr, size_t size, int protect_flags, int flags, int fd) {
+  void* p = mmap(addr, size, protect_flags, flags, fd, 0 /* offset */);
+  #if defined(__linux__) && defined(PR_SET_VMA)
+  if (p!=MAP_FAILED && p!=NULL) {
+    prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, p, size, "mimalloc");
+  }
+  #endif
+  return p;
+}
+
+static void* unix_mmap_prim_aligned(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) {
+  MI_UNUSED(try_alignment);
+  void* p = NULL;
+  #if defined(MAP_ALIGNED)  // BSD
+  if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) {
+    size_t n = 0;
+    mi_bsr(try_alignment, &n);
+    if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) {  // alignment is a power of 2 and 4096 <= alignment <= 1GiB
+      p = unix_mmap_prim(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd);
+      if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) {
+        int err = errno;
+        _mi_trace_message("unable to directly request aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, addr);
+      }
+      if (p!=MAP_FAILED) return p;
+      // fall back to regular mmap
+    }
+  }
+  #elif defined(MAP_ALIGN)  // Solaris
+  if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) {
+    p = unix_mmap_prim((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd);  // addr parameter is the required alignment
+    if (p!=MAP_FAILED) return p;
+    // fall back to regular mmap
+  }
+  #endif
+  #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED)
+  // on 64-bit systems, use the virtual address area after 2TiB for 4MiB aligned allocations
+  if (addr == NULL) {
+    void* hint = _mi_os_get_aligned_hint(try_alignment, size);
+    if (hint != NULL) {
+      p = unix_mmap_prim(hint, size, protect_flags, flags, fd);
+      if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) {
+        #if MI_TRACK_ENABLED  // asan sometimes does not instrument errno correctly?
+        int err = 0;
+        #else
+        int err = errno;
+        #endif
+        _mi_trace_message("unable to directly request hinted aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, hint);
+      }
+      if (p!=MAP_FAILED) return p;
+      // fall back to regular mmap
+    }
+  }
+  #endif
+  // regular mmap
+  p = unix_mmap_prim(addr, size, protect_flags, flags, fd);
+  if (p!=MAP_FAILED) return p;
+  // failed to allocate
+  return NULL;
+}
+
+static int unix_mmap_fd(void) {
+  #if defined(VM_MAKE_TAG)
+  // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99)
+  int os_tag = (int)mi_option_get(mi_option_os_tag);
+  if (os_tag < 100 || os_tag > 255) { os_tag = 254; }
+  return VM_MAKE_TAG(os_tag);
+  #else
+  return -1;
+  #endif
+}
+
+static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) {
+  #if !defined(MAP_ANONYMOUS)
+  #define MAP_ANONYMOUS  MAP_ANON
+  #endif
+  #if !defined(MAP_NORESERVE)
+  #define MAP_NORESERVE  0
+  #endif
+  void* p = NULL;
+  const int fd = unix_mmap_fd();
+  int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+  if (_mi_os_has_overcommit()) {
+    flags |= MAP_NORESERVE;
+  }
+  #if defined(PROT_MAX)
+  protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD
+  #endif
+  // huge page allocation
+  if (allow_large && (large_only || (_mi_os_canuse_large_page(size, try_alignment) && mi_option_is_enabled(mi_option_allow_large_os_pages)))) {
+    static _Atomic(size_t) large_page_try_ok; // = 0;
+    size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok);
+    if (!large_only && try_ok > 0) {
+      // If the OS is not configured for large OS pages, or the user does not have
+      // enough permission, the `mmap` will always fail (but it might also fail for other reasons).
+      // Therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times
+      // to avoid too many failing calls to mmap.
+      mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1);
+    }
+    else {
+      int lflags = flags & ~MAP_NORESERVE;  // using NORESERVE on huge pages seems to fail on Linux
+      int lfd = fd;
+      #ifdef MAP_ALIGNED_SUPER
+      lflags |= MAP_ALIGNED_SUPER;
+      #endif
+      #ifdef MAP_HUGETLB
+      lflags |= MAP_HUGETLB;
+      #endif
+      #ifdef MAP_HUGE_1GB
+      static bool mi_huge_pages_available = true;
+      if (large_only && (size % MI_GiB) == 0 && mi_huge_pages_available) {
+        lflags |= MAP_HUGE_1GB;
+      }
+      else
+      #endif
+      {
+        #ifdef MAP_HUGE_2MB
+        lflags |= MAP_HUGE_2MB;
+        #endif
+      }
+      #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB
+      lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB;
+      #endif
+      if (large_only || lflags != flags) {
+        // try large OS page allocation
+        *is_large = true;
+        p = unix_mmap_prim_aligned(addr, size, try_alignment, protect_flags, lflags, lfd);
+        #ifdef MAP_HUGE_1GB
+        if (p == NULL && (lflags & MAP_HUGE_1GB) == MAP_HUGE_1GB) {
+          mi_huge_pages_available = false; // don't try huge 1GiB pages again
+          if (large_only) {
+            _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (errno: %i)\n", errno);
+          }
+          lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB);
+          p = unix_mmap_prim_aligned(addr, size, try_alignment, protect_flags, lflags, lfd);
+        }
+        #endif
+        if (large_only) return p;
+        if (p == NULL) {
+          mi_atomic_store_release(&large_page_try_ok, (size_t)8);  // on error, don't try again for the next N allocations
+        }
+      }
+    }
+  }
+  // regular allocation
+  if (p == NULL) {
+    *is_large = false;
+    p = unix_mmap_prim_aligned(addr, size, try_alignment, protect_flags, flags, fd);
+    #if !defined(MI_NO_THP)
+    if (p != NULL && allow_large && mi_option_is_enabled(mi_option_allow_thp) && _mi_os_canuse_large_page(size, try_alignment)) {
+      #if defined(MADV_HUGEPAGE)
+      // Many Linux systems don't allow MAP_HUGETLB but they support instead
+      // transparent huge pages (THP). Generally, it is not required to call `madvise` with MADV_HUGE
+      // though since properly aligned allocations will already use large pages if available
+      // in that case -- in particular for our large regions (in `memory.c`).
+      // However, some systems only allow THP if called with explicit `madvise`, so
+      // when large OS pages are enabled for mimalloc, we call `madvise` anyways.
+      if (unix_madvise(p, size, MADV_HUGEPAGE) == 0) {
+        // *is_large = true; // possibly
+      };
+      #elif defined(__sun)
+      struct memcntl_mha cmd = {0};
+      cmd.mha_pagesize = _mi_os_large_page_size();
+      cmd.mha_cmd = MHA_MAPSIZE_VA;
+      if (memcntl((caddr_t)p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) {
+        // *is_large = true; // possibly
+      }
+      #endif
+    }
+    #endif
+  }
+  return p;
+}
+
+// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
+int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
+  mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
+  mi_assert_internal(commit || !allow_large);
+  mi_assert_internal(try_alignment > 0);
+  if (hint_addr == NULL && size >= 8*MI_UNIX_LARGE_PAGE_SIZE && try_alignment > 1 && _mi_is_power_of_two(try_alignment) && try_alignment < MI_UNIX_LARGE_PAGE_SIZE) {
+    try_alignment = MI_UNIX_LARGE_PAGE_SIZE; // try to align along large page size for larger allocations
+  }
+
+  *is_zero = true;
+  int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE);
+  *addr = unix_mmap(hint_addr, size, try_alignment, protect_flags, false, allow_large, is_large);
+  return (*addr != NULL ? 0 : errno);
+}
+
+
+//---------------------------------------------
+// Commit/Reset
+//---------------------------------------------
+
+static void unix_mprotect_hint(int err) {
+  #if defined(__linux__) && (MI_SECURE>=2) // guard page around every mimalloc page
+  if (err == ENOMEM) {
+    _mi_warning_message("The next warning may be caused by a low memory map limit.\n"
+                        "  On Linux this is controlled by the vm.max_map_count -- maybe increase it?\n"
+                        "  For example: sudo sysctl -w vm.max_map_count=262144\n");
+  }
+  #else
+  MI_UNUSED(err);
+  #endif
+}
+
+
+
+
+
+int _mi_prim_commit(void* start, size_t size, bool* is_zero) {
+  // commit: ensure we can access the area
+  // note: we may think that *is_zero can be true since the memory
+  // was either from mmap PROT_NONE, or from decommit MADV_DONTNEED, but
+  // we sometimes call commit on a range with still partially committed
+  // memory and `mprotect` does not zero the range.
+  *is_zero = false;
+  int err = mprotect(start, size, (PROT_READ | PROT_WRITE));
+  if (err != 0) {
+    err = errno;
+    unix_mprotect_hint(err);
+  }
+  return err;
+}
+
+int _mi_prim_reuse(void* start, size_t size) {
+  MI_UNUSED(start); MI_UNUSED(size);
+  #if defined(__APPLE__) && defined(MADV_FREE_REUSE)
+  return unix_madvise(start, size, MADV_FREE_REUSE);
+  #endif
+  return 0;
+}
+
+int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) {
+  int err = 0;
+  #if defined(__APPLE__) && defined(MADV_FREE_REUSABLE)
+    // decommit on macOS: use MADV_FREE_REUSABLE as it does immediate rss accounting (issue #1097)
+    err = unix_madvise(start, size, MADV_FREE_REUSABLE);
+    if (err) { err = unix_madvise(start, size, MADV_DONTNEED); }
+  #else
+    // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE)
+    err = unix_madvise(start, size, MADV_DONTNEED);
+  #endif
+  #if !MI_DEBUG && MI_SECURE<=2
+    *needs_recommit = false;
+  #else
+    *needs_recommit = true;
+    mprotect(start, size, PROT_NONE);
+  #endif
+  /*
+  // decommit: use mmap with MAP_FIXED and PROT_NONE to discard the existing memory (and reduce rss)
+  *needs_recommit = true;
+  const int fd = unix_mmap_fd();
+  void* p = mmap(start, size, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0);
+  if (p != start) { err = errno; }
+  */
+  return err;
+}
+
+int _mi_prim_reset(void* start, size_t size) {
+  int err = 0;
+
+  // on macOS can use MADV_FREE_REUSABLE (but we disable this for now as it seems slower)
+  #if 0 && defined(__APPLE__) && defined(MADV_FREE_REUSABLE)
+  err = unix_madvise(start, size, MADV_FREE_REUSABLE);
+  if (err==0) return 0;
+  // fall through
+  #endif
+
+  #if defined(MADV_FREE)
+  // Otherwise, we try to use `MADV_FREE` as that is the fastest. A drawback though is that it
+  // will not reduce the `rss` stats in tools like `top` even though the memory is available
+  // to other processes. With the default `MIMALLOC_PURGE_DECOMMITS=1` we ensure that by
+  // default `MADV_DONTNEED` is used though.
+  static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE);
+  int oadvice = (int)mi_atomic_load_relaxed(&advice);
+  while ((err = unix_madvise(start, size, oadvice)) != 0 && errno == EAGAIN) { errno = 0;  };
+  if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) {
+    // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on
+    mi_atomic_store_release(&advice, (size_t)MADV_DONTNEED);
+    err = unix_madvise(start, size, MADV_DONTNEED);
+  }
+  #else
+  err = unix_madvise(start, size, MADV_DONTNEED);
+  #endif
+  return err;
+}
+
+int _mi_prim_protect(void* start, size_t size, bool protect) {
+  int err = mprotect(start, size, protect ? PROT_NONE : (PROT_READ | PROT_WRITE));
+  if (err != 0) { err = errno; }
+  unix_mprotect_hint(err);
+  return err;
+}
+
+
+
+//---------------------------------------------
+// Huge page allocation
+//---------------------------------------------
+
+#if (MI_INTPTR_SIZE >= 8) && !defined(__HAIKU__) && !defined(__CYGWIN__)
+
+#ifndef MPOL_PREFERRED
+#define MPOL_PREFERRED 1
+#endif
+
+#if defined(MI_HAS_SYSCALL_H) && defined(SYS_mbind)
+static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) {
+  return syscall(SYS_mbind, start, len, mode, nmask, maxnode, flags);
+}
+#else
+static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) {
+  MI_UNUSED(start); MI_UNUSED(len); MI_UNUSED(mode); MI_UNUSED(nmask); MI_UNUSED(maxnode); MI_UNUSED(flags);
+  return 0;
+}
+#endif
+
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
+  bool is_large = true;
+  *is_zero = true;
+  *addr = unix_mmap(hint_addr, size, MI_ARENA_SLICE_ALIGN, PROT_READ | PROT_WRITE, true, true, &is_large);
+  if (*addr != NULL && numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes
+    unsigned long numa_mask = (1UL << numa_node);
+    // TODO: does `mbind` work correctly for huge OS pages? should we
+    // use `set_mempolicy` before calling mmap instead?
+    // see: <https://lkml.org/lkml/2017/2/9/875>
+    long err = mi_prim_mbind(*addr, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0);
+    if (err != 0) {
+      err = errno;
+      _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d (error: %d (0x%x))\n", numa_node, err, err);
+    }
+  }
+  return (*addr != NULL ? 0 : errno);
+}
+
+#else
+
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
+  MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node);
+  *is_zero = false;
+  *addr = NULL;
+  return ENOMEM;
+}
+
+#endif
+
+//---------------------------------------------
+// NUMA nodes
+//---------------------------------------------
+
+#if defined(__linux__)
+
+size_t _mi_prim_numa_node(void) {
+  #if defined(MI_HAS_SYSCALL_H) && defined(SYS_getcpu)
+    unsigned long node = 0;
+    unsigned long ncpu = 0;
+    long err = syscall(SYS_getcpu, &ncpu, &node, NULL);
+    if (err != 0) return 0;
+    return node;
+  #else
+    return 0;
+  #endif
+}
+
+size_t _mi_prim_numa_node_count(void) {
+  char buf[128];
+  unsigned node = 0;
+  for(node = 0; node < 256; node++) {
+    // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation)
+    _mi_snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1);
+    if (mi_prim_access(buf,R_OK) != 0) break;
+  }
+  return (node+1);
+}
+
+#elif defined(__FreeBSD__) && __FreeBSD_version >= 1200000
+
+size_t _mi_prim_numa_node(void) {
+  domainset_t dom;
+  size_t node;
+  int policy;
+  if (cpuset_getdomain(CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, sizeof(dom), &dom, &policy) == -1) return 0ul;
+  for (node = 0; node < MAXMEMDOM; node++) {
+    if (DOMAINSET_ISSET(node, &dom)) return node;
+  }
+  return 0ul;
+}
+
+size_t _mi_prim_numa_node_count(void) {
+  size_t ndomains = 0;
+  size_t len = sizeof(ndomains);
+  if (sysctlbyname("vm.ndomains", &ndomains, &len, NULL, 0) == -1) return 0ul;
+  return ndomains;
+}
+
+#elif defined(__DragonFly__)
+
+size_t _mi_prim_numa_node(void) {
+  // TODO: DragonFly does not seem to provide any userland means to get this information.
+  return 0ul;
+}
+
+size_t _mi_prim_numa_node_count(void) {
+  size_t ncpus = 0, nvirtcoresperphys = 0;
+  size_t len = sizeof(size_t);
+  if (sysctlbyname("hw.ncpu", &ncpus, &len, NULL, 0) == -1) return 0ul;
+  if (sysctlbyname("hw.cpu_topology_ht_ids", &nvirtcoresperphys, &len, NULL, 0) == -1) return 0ul;
+  return nvirtcoresperphys * ncpus;
+}
+
+#else
+
+size_t _mi_prim_numa_node(void) {
+  return 0;
+}
+
+size_t _mi_prim_numa_node_count(void) {
+  return 1;
+}
+
+#endif
+
+// ----------------------------------------------------------------
+// Clock
+// ----------------------------------------------------------------
+
+#include <time.h>
+
+#if defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC)
+
+mi_msecs_t _mi_prim_clock_now(void) {
+  struct timespec t;
+  #ifdef CLOCK_MONOTONIC
+  clock_gettime(CLOCK_MONOTONIC, &t);
+  #else
+  clock_gettime(CLOCK_REALTIME, &t);
+  #endif
+  return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000);
+}
+
+#else
+
+// low resolution timer
+mi_msecs_t _mi_prim_clock_now(void) {
+  #if !defined(CLOCKS_PER_SEC) || (CLOCKS_PER_SEC == 1000) || (CLOCKS_PER_SEC == 0)
+  return (mi_msecs_t)clock();
+  #elif (CLOCKS_PER_SEC < 1000)
+  return (mi_msecs_t)clock() * (1000 / (mi_msecs_t)CLOCKS_PER_SEC);
+  #else
+  return (mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000);
+  #endif
+}
+
+#endif
+
+
+
+
+//----------------------------------------------------------------
+// Process info
+//----------------------------------------------------------------
+
+#if defined(__unix__) || defined(__unix) || defined(unix) || defined(__APPLE__) || defined(__HAIKU__)
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/resource.h>
+
+#if defined(__APPLE__)
+#include <mach/mach.h>
+#endif
+
+#if defined(__HAIKU__)
+#include <kernel/OS.h>
+#endif
+
+static mi_msecs_t timeval_secs(const struct timeval* tv) {
+  return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L);
+}
+
+void _mi_prim_process_info(mi_process_info_t* pinfo)
+{
+  struct rusage rusage;
+  getrusage(RUSAGE_SELF, &rusage);
+  pinfo->utime = timeval_secs(&rusage.ru_utime);
+  pinfo->stime = timeval_secs(&rusage.ru_stime);
+#if !defined(__HAIKU__)
+  pinfo->page_faults = rusage.ru_majflt;
+#endif
+#if defined(__HAIKU__)
+  // Haiku does not have (yet?) a way to
+  // get these stats per process
+  thread_info tid;
+  area_info mem;
+  ssize_t c;
+  get_thread_info(find_thread(0), &tid);
+  while (get_next_area_info(tid.team, &c, &mem) == B_OK) {
+    pinfo->peak_rss += mem.ram_size;
+  }
+  pinfo->page_faults = 0;
+#elif defined(__APPLE__)
+  pinfo->peak_rss = rusage.ru_maxrss;         // macos reports in bytes
+  #ifdef MACH_TASK_BASIC_INFO
+  struct mach_task_basic_info info;
+  mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;
+  if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) {
+    pinfo->current_rss = (size_t)info.resident_size;
+  }
+  #else
+  struct task_basic_info info;
+  mach_msg_type_number_t infoCount = TASK_BASIC_INFO_COUNT;
+  if (task_info(mach_task_self(), TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) {
+    pinfo->current_rss = (size_t)info.resident_size;
+  }
+  #endif
+#else
+  pinfo->peak_rss = rusage.ru_maxrss * 1024;  // Linux/BSD report in KiB
+#endif
+  // use defaults for commit
+}
+
+#else
+
+#ifndef __wasi__
+// WebAssembly instances are not processes
+#pragma message("define a way to get process info")
+#endif
+
+void _mi_prim_process_info(mi_process_info_t* pinfo)
+{
+  // use defaults
+  MI_UNUSED(pinfo);
+}
+
+#endif
+
+
+//----------------------------------------------------------------
+// Output
+//----------------------------------------------------------------
+
+void _mi_prim_out_stderr( const char* msg ) {
+  fputs(msg,stderr);
+}
+
+
+//----------------------------------------------------------------
+// Environment
+//----------------------------------------------------------------
+
+#if !defined(MI_USE_ENVIRON) || (MI_USE_ENVIRON!=0)
+// On Posix systemsr use `environ` to access environment variables
+// even before the C runtime is initialized.
+#if defined(__APPLE__) && defined(__has_include) && __has_include(<crt_externs.h>)
+#include <crt_externs.h>
+static char** mi_get_environ(void) {
+  return (*_NSGetEnviron());
+}
+#else
+extern char** environ;
+static char** mi_get_environ(void) {
+  return environ;
+}
+#endif
+bool _mi_prim_getenv(const char* name, char* result, size_t result_size) {
+  if (name==NULL) return false;
+  const size_t len = _mi_strlen(name);
+  if (len == 0) return false;
+  char** env = mi_get_environ();
+  if (env == NULL) return false;
+  // compare up to 10000 entries
+  for (int i = 0; i < 10000 && env[i] != NULL; i++) {
+    const char* s = env[i];
+    if (_mi_strnicmp(name, s, len) == 0 && s[len] == '=') { // case insensitive
+      // found it
+      _mi_strlcpy(result, s + len + 1, result_size);
+      return true;
+    }
+  }
+  return false;
+}
+#else
+// fallback: use standard C `getenv` but this cannot be used while initializing the C runtime
+bool _mi_prim_getenv(const char* name, char* result, size_t result_size) {
+  // cannot call getenv() when still initializing the C runtime.
+  if (_mi_preloading()) return false;
+  const char* s = getenv(name);
+  if (s == NULL) {
+    // we check the upper case name too.
+    char buf[64+1];
+    size_t len = _mi_strnlen(name,sizeof(buf)-1);
+    for (size_t i = 0; i < len; i++) {
+      buf[i] = _mi_toupper(name[i]);
+    }
+    buf[len] = 0;
+    s = getenv(buf);
+  }
+  if (s == NULL || _mi_strnlen(s,result_size) >= result_size)  return false;
+  _mi_strlcpy(result, s, result_size);
+  return true;
+}
+#endif  // !MI_USE_ENVIRON
+
+
+//----------------------------------------------------------------
+// Random
+//----------------------------------------------------------------
+
+#if defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_15) && (MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_15)
+#include <CommonCrypto/CommonCryptoError.h>
+#include <CommonCrypto/CommonRandom.h>
+
+bool _mi_prim_random_buf(void* buf, size_t buf_len) {
+  // We prefer CCRandomGenerateBytes as it returns an error code while arc4random_buf
+  // may fail silently on macOS. See PR #390, and <https://opensource.apple.com/source/Libc/Libc-1439.40.11/gen/FreeBSD/arc4random.c.auto.html>
+  return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess);
+}
+
+#elif defined(__ANDROID__) || defined(__DragonFly__) || \
+      defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
+      defined(__sun) || \
+      (defined(__APPLE__) && (MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7))
+
+bool _mi_prim_random_buf(void* buf, size_t buf_len) {
+  arc4random_buf(buf, buf_len);
+  return true;
+}
+
+#elif defined(__APPLE__) || defined(__linux__) || defined(__HAIKU__)   // also for old apple versions < 10.7 (issue #829)
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+
+bool _mi_prim_random_buf(void* buf, size_t buf_len) {
+  // Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h`
+  // and for the latter the actual `getrandom` call is not always defined.
+  // (see <https://stackoverflow.com/questions/45237324/why-doesnt-getrandom-compile>)
+  // We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed.
+  #if defined(MI_HAS_SYSCALL_H) && defined(SYS_getrandom)
+    #ifndef GRND_NONBLOCK
+    #define GRND_NONBLOCK (1)
+    #endif
+    static _Atomic(uintptr_t) no_getrandom; // = 0
+    if (mi_atomic_load_acquire(&no_getrandom)==0) {
+      ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK);
+      if (ret >= 0) return (buf_len == (size_t)ret);
+      if (errno != ENOSYS) return false;
+      mi_atomic_store_release(&no_getrandom, (uintptr_t)1); // don't call again, and fall back to /dev/urandom
+    }
+  #endif
+  int flags = O_RDONLY;
+  #if defined(O_CLOEXEC)
+  flags |= O_CLOEXEC;
+  #endif
+  int fd = mi_prim_open("/dev/urandom", flags);
+  if (fd < 0) return false;
+  size_t count = 0;
+  while(count < buf_len) {
+    ssize_t ret = mi_prim_read(fd, (char*)buf + count, buf_len - count);
+    if (ret<=0) {
+      if (errno!=EAGAIN && errno!=EINTR) break;
+    }
+    else {
+      count += ret;
+    }
+  }
+  mi_prim_close(fd);
+  return (count==buf_len);
+}
+
+#else
+
+bool _mi_prim_random_buf(void* buf, size_t buf_len) {
+  return false;
+}
+
+#endif
+
+
+//----------------------------------------------------------------
+// Thread init/done
+//----------------------------------------------------------------
+
+#if defined(MI_USE_PTHREADS)
+
+// use pthread local storage keys to detect thread ending
+// (and used with MI_TLS_PTHREADS for the default theap)
+pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1);
+
+static void mi_pthread_done(void* value) {
+  if (value!=NULL) {
+    _mi_thread_done((mi_theap_t*)value);
+  }
+}
+
+void _mi_prim_thread_init_auto_done(void) {
+  mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1));
+  pthread_key_create(&_mi_heap_default_key, &mi_pthread_done);
+}
+
+void _mi_prim_thread_done_auto_done(void) {
+  if (_mi_heap_default_key != (pthread_key_t)(-1)) {  // do not leak the key, see issue #809
+    pthread_key_delete(_mi_heap_default_key);
+  }
+}
+
+void _mi_prim_thread_associate_default_theap(mi_theap_t* theap) {
+  if (_mi_heap_default_key != (pthread_key_t)(-1)) {  // can happen during recursive invocation on freeBSD
+    pthread_setspecific(_mi_heap_default_key, theap);
+  }
+}
+
+#else
+
+void _mi_prim_thread_init_auto_done(void) {
+  // nothing
+}
+
+void _mi_prim_thread_done_auto_done(void) {
+  // nothing
+}
+
+void _mi_prim_thread_associate_default_theap(mi_theap_t* theap) {
+  MI_UNUSED(theap);
+}
+
+#endif
+
+bool _mi_prim_thread_is_in_threadpool(void) {
+  return false;
+}
diff --git a/3rd/mimalloc-3.2.6/src/prim/wasi/prim.c b/3rd/mimalloc-3.2.6/src/prim/wasi/prim.c
new file mode 100644
index 00000000..4e6270dd
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/prim/wasi/prim.c
@@ -0,0 +1,288 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+// This file is included in `src/prim/prim.c`
+
+#include "mimalloc.h"
+#include "mimalloc/internal.h"
+#include "mimalloc/prim.h"
+
+#include <stdio.h>   // fputs
+#include <stdlib.h>  // getenv
+
+//---------------------------------------------
+// Initialize
+//---------------------------------------------
+
+void _mi_prim_mem_init( mi_os_mem_config_t* config ) {
+  config->page_size = 64*MI_KiB; // WebAssembly has a fixed page size: 64KiB
+  config->alloc_granularity = 16;
+  config->has_overcommit = false;
+  config->has_partial_free = false;
+  config->has_virtual_reserve = false;
+}
+
+//---------------------------------------------
+// Free
+//---------------------------------------------
+
+int _mi_prim_free(void* addr, size_t size ) {
+  MI_UNUSED(addr); MI_UNUSED(size);
+  // wasi theap cannot be shrunk
+  return 0;
+}
+
+
+//---------------------------------------------
+// Allocation: sbrk or memory_grow
+//---------------------------------------------
+
+#if defined(MI_USE_SBRK)
+  #include <unistd.h>  // for sbrk
+
+  static void* mi_memory_grow( size_t size ) {
+    void* p = sbrk(size);
+    if (p == (void*)(-1)) return NULL;
+    #if !defined(__wasi__) // on wasi this is always zero initialized already (?)
+    memset(p,0,size);
+    #endif
+    return p;
+  }
+#elif defined(__wasi__)
+  static void* mi_memory_grow( size_t size ) {
+    size_t base = (size > 0 ? __builtin_wasm_memory_grow(0,_mi_divide_up(size, _mi_os_page_size()))
+                            : __builtin_wasm_memory_size(0));
+    if (base == SIZE_MAX) return NULL;
+    return (void*)(base * _mi_os_page_size());
+  }
+#endif
+
+#if defined(MI_USE_PTHREADS)
+static pthread_mutex_t mi_theap_grow_mutex = PTHREAD_MUTEX_INITIALIZER;
+#endif
+
+static void* mi_prim_mem_grow(size_t size, size_t try_alignment) {
+  void* p = NULL;
+  if (try_alignment <= 1) {
+    // `sbrk` is not thread safe in general so try to protect it (we could skip this on WASM but leave it in for now)
+    #if defined(MI_USE_PTHREADS)
+    pthread_mutex_lock(&mi_theap_grow_mutex);
+    #endif
+    p = mi_memory_grow(size);
+    #if defined(MI_USE_PTHREADS)
+    pthread_mutex_unlock(&mi_theap_grow_mutex);
+    #endif
+  }
+  else {
+    void* base = NULL;
+    size_t alloc_size = 0;
+    // to allocate aligned use a lock to try to avoid thread interaction
+    // between getting the current size and actual allocation
+    // (also, `sbrk` is not thread safe in general)
+    #if defined(MI_USE_PTHREADS)
+    pthread_mutex_lock(&mi_theap_grow_mutex);
+    #endif
+    {
+      void* current = mi_memory_grow(0);  // get current size
+      if (current != NULL) {
+        void* aligned_current = mi_align_up_ptr(current, try_alignment);  // and align from there to minimize wasted space
+        alloc_size = _mi_align_up( ((uint8_t*)aligned_current - (uint8_t*)current) + size, _mi_os_page_size());
+        base = mi_memory_grow(alloc_size);
+      }
+    }
+    #if defined(MI_USE_PTHREADS)
+    pthread_mutex_unlock(&mi_theap_grow_mutex);
+    #endif
+    if (base != NULL) {
+      p = mi_align_up_ptr(base, try_alignment);
+      if ((uint8_t*)p + size > (uint8_t*)base + alloc_size) {
+        // another thread used wasm_memory_grow/sbrk in-between and we do not have enough
+        // space after alignment. Give up (and waste the space as we cannot shrink :-( )
+        // (in `mi_os_mem_alloc_aligned` this will fall back to overallocation to align)
+        p = NULL;
+      }
+    }
+  }
+  /*
+  if (p == NULL) {
+    _mi_warning_message("unable to allocate sbrk/wasm_memory_grow OS memory (%zu bytes, %zu alignment)\n", size, try_alignment);
+    errno = ENOMEM;
+    return NULL;
+  }
+  */
+  mi_assert_internal( p == NULL || try_alignment == 0 || (uintptr_t)p % try_alignment == 0 );
+  return p;
+}
+
+// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
+int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
+  MI_UNUSED(allow_large); MI_UNUSED(commit); MI_UNUSED(hint_addr);
+  *is_large = false;
+  *is_zero = false;
+  *addr = mi_prim_mem_grow(size, try_alignment);
+  return (*addr != NULL ? 0 : ENOMEM);
+}
+
+
+//---------------------------------------------
+// Commit/Reset/Protect
+//---------------------------------------------
+
+int _mi_prim_commit(void* addr, size_t size, bool* is_zero) {
+  MI_UNUSED(addr); MI_UNUSED(size);
+  *is_zero = false;
+  return 0;
+}
+
+int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) {
+  MI_UNUSED(addr); MI_UNUSED(size);
+  *needs_recommit = false;
+  return 0;
+}
+
+int _mi_prim_reset(void* addr, size_t size) {
+  MI_UNUSED(addr); MI_UNUSED(size);
+  return 0;
+}
+
+int _mi_prim_reuse(void* addr, size_t size) {
+  MI_UNUSED(addr); MI_UNUSED(size);
+  return 0;
+}
+
+int _mi_prim_protect(void* addr, size_t size, bool protect) {
+  MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(protect);
+  return 0;
+}
+
+
+//---------------------------------------------
+// Huge pages and NUMA nodes
+//---------------------------------------------
+
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
+  MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node);
+  *is_zero = true;
+  *addr = NULL;
+  return ENOSYS;
+}
+
+size_t _mi_prim_numa_node(void) {
+  return 0;
+}
+
+size_t _mi_prim_numa_node_count(void) {
+  return 1;
+}
+
+
+//----------------------------------------------------------------
+// Clock
+//----------------------------------------------------------------
+
+#include <time.h>
+
+#if defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC)
+
+mi_msecs_t _mi_prim_clock_now(void) {
+  struct timespec t;
+  #ifdef CLOCK_MONOTONIC
+  clock_gettime(CLOCK_MONOTONIC, &t);
+  #else
+  clock_gettime(CLOCK_REALTIME, &t);
+  #endif
+  return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000);
+}
+
+#else
+
+// low resolution timer
+mi_msecs_t _mi_prim_clock_now(void) {
+  #if !defined(CLOCKS_PER_SEC) || (CLOCKS_PER_SEC == 1000) || (CLOCKS_PER_SEC == 0)
+  return (mi_msecs_t)clock();
+  #elif (CLOCKS_PER_SEC < 1000)
+  return (mi_msecs_t)clock() * (1000 / (mi_msecs_t)CLOCKS_PER_SEC);
+  #else
+  return (mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000);
+  #endif
+}
+
+#endif
+
+
+//----------------------------------------------------------------
+// Process info
+//----------------------------------------------------------------
+
+void _mi_prim_process_info(mi_process_info_t* pinfo)
+{
+  // use defaults
+  MI_UNUSED(pinfo);
+}
+
+
+//----------------------------------------------------------------
+// Output
+//----------------------------------------------------------------
+
+void _mi_prim_out_stderr( const char* msg ) {
+  fputs(msg,stderr);
+}
+
+
+//----------------------------------------------------------------
+// Environment
+//----------------------------------------------------------------
+
+bool _mi_prim_getenv(const char* name, char* result, size_t result_size) {
+  // cannot call getenv() when still initializing the C runtime.
+  if (_mi_preloading()) return false;
+  const char* s = getenv(name);
+  if (s == NULL) {
+    // we check the upper case name too.
+    char buf[64+1];
+    size_t len = _mi_strnlen(name,sizeof(buf)-1);
+    for (size_t i = 0; i < len; i++) {
+      buf[i] = _mi_toupper(name[i]);
+    }
+    buf[len] = 0;
+    s = getenv(buf);
+  }
+  if (s == NULL || _mi_strnlen(s,result_size) >= result_size)  return false;
+  _mi_strlcpy(result, s, result_size);
+  return true;
+}
+
+
+//----------------------------------------------------------------
+// Random
+//----------------------------------------------------------------
+
+bool _mi_prim_random_buf(void* buf, size_t buf_len) {
+  return false;
+}
+
+
+//----------------------------------------------------------------
+// Thread init/done
+//----------------------------------------------------------------
+
+void _mi_prim_thread_init_auto_done(void) {
+  // nothing
+}
+
+void _mi_prim_thread_done_auto_done(void) {
+  // nothing
+}
+
+void _mi_prim_thread_associate_default_theap(mi_theap_t* theap) {
+  MI_UNUSED(theap);
+}
+
+bool _mi_prim_thread_is_in_threadpool(void) {
+  return false;
+}
diff --git a/3rd/mimalloc-3.2.6/src/prim/windows/etw-mimalloc.wprp b/3rd/mimalloc-3.2.6/src/prim/windows/etw-mimalloc.wprp
new file mode 100644
index 00000000..b00cd7ad
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/prim/windows/etw-mimalloc.wprp
@@ -0,0 +1,61 @@
+<WindowsPerformanceRecorder Version="1.0">
+  <Profiles>
+    <SystemCollector Id="WPR_initiated_WprApp_WPR_System_Collector" Name="WPR_initiated_WprApp_WPR System Collector">
+      <BufferSize Value="1024" />
+      <Buffers Value="100" />
+    </SystemCollector>
+    <EventCollector Id="Mimalloc_Collector" Name="Mimalloc Collector">
+      <BufferSize Value="1024" />
+      <Buffers Value="100" />
+    </EventCollector>
+    <SystemProvider Id="WPR_initiated_WprApp_WPR_System_Collector_Provider">
+      <Keywords>
+        <Keyword Value="Loader" />
+      </Keywords>
+    </SystemProvider>
+    <EventProvider Id="MimallocEventProvider" Name="138f4dbb-ee04-4899-aa0a-572ad4475779" NonPagedMemory="true" Stack="true">
+      <EventFilters FilterIn="true">
+        <EventId Value="100" />
+        <EventId Value="101" />
+      </EventFilters>
+    </EventProvider>
+    <Profile Id="CustomHeap.Verbose.File" Name="CustomHeap" Description="RunningProfile:CustomHeap.Verbose.File" LoggingMode="File" DetailLevel="Verbose">
+      <ProblemCategories>
+        <ProblemCategory Value="Resource Analysis" />
+      </ProblemCategories>
+      <Collectors>
+        <SystemCollectorId Value="WPR_initiated_WprApp_WPR_System_Collector">
+          <SystemProviderId Value="WPR_initiated_WprApp_WPR_System_Collector_Provider" />
+        </SystemCollectorId>
+        <EventCollectorId Value="Mimalloc_Collector">
+          <EventProviders>
+            <EventProviderId Value="MimallocEventProvider" >
+              <Keywords>
+                <Keyword Value="100"/>
+                <Keyword Value="101"/>
+              </Keywords>
+            </EventProviderId>
+          </EventProviders>
+        </EventCollectorId>
+      </Collectors>
+      <TraceMergeProperties>
+        <TraceMergeProperty Id="BaseVerboseTraceMergeProperties" Name="BaseTraceMergeProperties">
+          <DeletePreMergedTraceFiles Value="true" />
+          <FileCompression Value="false" />
+          <InjectOnly Value="false" />
+          <SkipMerge Value="false" />
+          <CustomEvents>
+            <CustomEvent Value="ImageId" />
+            <CustomEvent Value="BuildInfo" />
+            <CustomEvent Value="VolumeMapping" />
+            <CustomEvent Value="EventMetadata" />
+            <CustomEvent Value="PerfTrackMetadata" />
+            <CustomEvent Value="WinSAT" />
+            <CustomEvent Value="NetworkInterface" />
+          </CustomEvents>
+        </TraceMergeProperty>
+      </TraceMergeProperties>
+    </Profile>
+  </Profiles>
+</WindowsPerformanceRecorder>
+
diff --git a/3rd/mimalloc-3.2.6/src/prim/windows/etw.h b/3rd/mimalloc-3.2.6/src/prim/windows/etw.h
new file mode 100644
index 00000000..4e0a092a
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/prim/windows/etw.h
@@ -0,0 +1,905 @@
+//**********************************************************************`
+//* This is an include file generated by Message Compiler.             *`
+//*                                                                    *`
+//* Copyright (c) Microsoft Corporation. All Rights Reserved.          *`
+//**********************************************************************`
+#pragma once
+
+//*****************************************************************************
+//
+// Notes on the ETW event code generated by MC:
+//
+// - Structures and arrays of structures are treated as an opaque binary blob.
+//   The caller is responsible for packing the data for the structure into a
+//   single region of memory, with no padding between values. The macro will
+//   have an extra parameter for the length of the blob.
+// - Arrays of nul-terminated strings must be packed by the caller into a
+//   single binary blob containing the correct number of strings, with a nul
+//   after each string. The size of the blob is specified in characters, and
+//   includes the final nul.
+// - Arrays of SID are treated as a single binary blob. The caller is
+//   responsible for packing the SID values into a single region of memory with
+//   no padding.
+// - The length attribute on the data element in the manifest is significant
+//   for values with intype win:UnicodeString, win:AnsiString, or win:Binary.
+//   The length attribute must be specified for win:Binary, and is optional for
+//   win:UnicodeString and win:AnsiString (if no length is given, the strings
+//   are assumed to be nul-terminated). For win:UnicodeString, the length is
+//   measured in characters, not bytes.
+// - For an array of win:UnicodeString, win:AnsiString, or win:Binary, the
+//   length attribute applies to every value in the array, so every value in
+//   the array must have the same length. The values in the array are provided
+//   to the macro via a single pointer -- the caller is responsible for packing
+//   all of the values into a single region of memory with no padding between
+//   values.
+// - Values of type win:CountedUnicodeString, win:CountedAnsiString, and
+//   win:CountedBinary can be generated and collected on Vista or later.
+//   However, they may not decode properly without the Windows 10 2018 Fall
+//   Update.
+// - Arrays of type win:CountedUnicodeString, win:CountedAnsiString, and
+//   win:CountedBinary must be packed by the caller into a single region of
+//   memory. The format for each item is a UINT16 byte-count followed by that
+//   many bytes of data. When providing the array to the generated macro, you
+//   must provide the total size of the packed array data, including the UINT16
+//   sizes for each item. In the case of win:CountedUnicodeString, the data
+//   size is specified in WCHAR (16-bit) units. In the case of
+//   win:CountedAnsiString and win:CountedBinary, the data size is specified in
+//   bytes.
+//
+//*****************************************************************************
+
+#include <wmistr.h>
+#include <evntrace.h>
+#include <evntprov.h>
+
+#ifndef ETW_INLINE
+  #ifdef _ETW_KM_
+    // In kernel mode, save stack space by never inlining templates.
+    #define ETW_INLINE DECLSPEC_NOINLINE __inline
+  #else
+    // In user mode, save code size by inlining templates as appropriate.
+    #define ETW_INLINE __inline
+  #endif
+#endif // ETW_INLINE
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+//
+// MCGEN_DISABLE_PROVIDER_CODE_GENERATION macro:
+// Define this macro to have the compiler skip the generated functions in this
+// header.
+//
+#ifndef MCGEN_DISABLE_PROVIDER_CODE_GENERATION
+
+//
+// MCGEN_USE_KERNEL_MODE_APIS macro:
+// Controls whether the generated code uses kernel-mode or user-mode APIs.
+// - Set to 0 to use Windows user-mode APIs such as EventRegister.
+// - Set to 1 to use Windows kernel-mode APIs such as EtwRegister.
+// Default is based on whether the _ETW_KM_ macro is defined (i.e. by wdm.h).
+// Note that the APIs can also be overridden directly, e.g. by setting the
+// MCGEN_EVENTWRITETRANSFER or MCGEN_EVENTREGISTER macros.
+//
+#ifndef MCGEN_USE_KERNEL_MODE_APIS
+  #ifdef _ETW_KM_
+    #define MCGEN_USE_KERNEL_MODE_APIS 1
+  #else
+    #define MCGEN_USE_KERNEL_MODE_APIS 0
+  #endif
+#endif // MCGEN_USE_KERNEL_MODE_APIS
+
+//
+// MCGEN_HAVE_EVENTSETINFORMATION macro:
+// Controls how McGenEventSetInformation uses the EventSetInformation API.
+// - Set to 0 to disable the use of EventSetInformation
+//   (McGenEventSetInformation will always return an error).
+// - Set to 1 to directly invoke MCGEN_EVENTSETINFORMATION.
+// - Set to 2 to to locate EventSetInformation at runtime via GetProcAddress
+//   (user-mode) or MmGetSystemRoutineAddress (kernel-mode).
+// Default is determined as follows:
+// - If MCGEN_EVENTSETINFORMATION has been customized, set to 1
+//   (i.e. use MCGEN_EVENTSETINFORMATION).
+// - Else if the target OS version has EventSetInformation, set to 1
+//   (i.e. use MCGEN_EVENTSETINFORMATION).
+// - Else set to 2 (i.e. try to dynamically locate EventSetInformation).
+// Note that an McGenEventSetInformation function will only be generated if one
+// or more provider in a manifest has provider traits.
+//
+#ifndef MCGEN_HAVE_EVENTSETINFORMATION
+  #ifdef MCGEN_EVENTSETINFORMATION             // if MCGEN_EVENTSETINFORMATION has been customized,
+    #define MCGEN_HAVE_EVENTSETINFORMATION   1 //   directly invoke MCGEN_EVENTSETINFORMATION(...).
+  #elif MCGEN_USE_KERNEL_MODE_APIS             // else if using kernel-mode APIs,
+    #if NTDDI_VERSION >= 0x06040000            //   if target OS is Windows 10 or later,
+      #define MCGEN_HAVE_EVENTSETINFORMATION 1 //     directly invoke MCGEN_EVENTSETINFORMATION(...).
+    #else                                      //   else
+      #define MCGEN_HAVE_EVENTSETINFORMATION 2 //     find "EtwSetInformation" via MmGetSystemRoutineAddress.
+    #endif                                     // else (using user-mode APIs)
+  #else                                        //   if target OS and SDK is Windows 8 or later,
+    #if WINVER >= 0x0602 && defined(EVENT_FILTER_TYPE_SCHEMATIZED)
+      #define MCGEN_HAVE_EVENTSETINFORMATION 1 //     directly invoke MCGEN_EVENTSETINFORMATION(...).
+    #else                                      //   else
+      #define MCGEN_HAVE_EVENTSETINFORMATION 2 //     find "EventSetInformation" via GetModuleHandleExW/GetProcAddress.
+    #endif
+  #endif
+#endif // MCGEN_HAVE_EVENTSETINFORMATION
+
+//
+// MCGEN Override Macros
+//
+// The following override macros may be defined before including this header
+// to control the APIs used by this header:
+//
+// - MCGEN_EVENTREGISTER
+// - MCGEN_EVENTUNREGISTER
+// - MCGEN_EVENTSETINFORMATION
+// - MCGEN_EVENTWRITETRANSFER
+//
+// If the the macro is undefined, the MC implementation will default to the
+// corresponding ETW APIs. For example, if the MCGEN_EVENTREGISTER macro is
+// undefined, the EventRegister[MyProviderName] macro will use EventRegister
+// in user mode and will use EtwRegister in kernel mode.
+//
+// To prevent issues from conflicting definitions of these macros, the value
+// of the override macro will be used as a suffix in certain internal function
+// names. Because of this, the override macros must follow certain rules:
+//
+// - The macro must be defined before any MC-generated header is included and
+//   must not be undefined or redefined after any MC-generated header is
+//   included. Different translation units (i.e. different .c or .cpp files)
+//   may set the macros to different values, but within a translation unit
+//   (within a single .c or .cpp file), the macro must be set once and not
+//   changed.
+// - The override must be an object-like macro, not a function-like macro
+//   (i.e. the override macro must not have a parameter list).
+// - The override macro's value must be a simple identifier, i.e. must be
+//   something that starts with a letter or '_' and contains only letters,
+//   numbers, and '_' characters.
+// - If the override macro's value is the name of a second object-like macro,
+//   the second object-like macro must follow the same rules. (The override
+//   macro's value can also be the name of a function-like macro, in which
+//   case the function-like macro does not need to follow the same rules.)
+//
+// For example, the following will cause compile errors:
+//
+//   #define MCGEN_EVENTWRITETRANSFER MyNamespace::MyClass::MyFunction // Value has non-identifier characters (colon).
+//   #define MCGEN_EVENTWRITETRANSFER GetEventWriteFunctionPointer(7)  // Value has non-identifier characters (parentheses).
+//   #define MCGEN_EVENTWRITETRANSFER(h,e,a,r,c,d) EventWrite(h,e,c,d) // Override is defined as a function-like macro.
+//   #define MY_OBJECT_LIKE_MACRO     MyNamespace::MyClass::MyEventWriteFunction
+//   #define MCGEN_EVENTWRITETRANSFER MY_OBJECT_LIKE_MACRO // Evaluates to something with non-identifier characters (colon).
+//
+// The following would be ok:
+//
+//   #define MCGEN_EVENTWRITETRANSFER  MyEventWriteFunction1  // OK, suffix will be "MyEventWriteFunction1".
+//   #define MY_OBJECT_LIKE_MACRO      MyEventWriteFunction2
+//   #define MCGEN_EVENTWRITETRANSFER  MY_OBJECT_LIKE_MACRO   // OK, suffix will be "MyEventWriteFunction2".
+//   #define MY_FUNCTION_LIKE_MACRO(h,e,a,r,c,d) MyNamespace::MyClass::MyEventWriteFunction3(h,e,c,d)
+//   #define MCGEN_EVENTWRITETRANSFER  MY_FUNCTION_LIKE_MACRO // OK, suffix will be "MY_FUNCTION_LIKE_MACRO".
+//
+#ifndef MCGEN_EVENTREGISTER
+  #if MCGEN_USE_KERNEL_MODE_APIS
+    #define MCGEN_EVENTREGISTER        EtwRegister
+  #else
+    #define MCGEN_EVENTREGISTER        EventRegister
+  #endif
+#endif // MCGEN_EVENTREGISTER
+#ifndef MCGEN_EVENTUNREGISTER
+  #if MCGEN_USE_KERNEL_MODE_APIS
+    #define MCGEN_EVENTUNREGISTER      EtwUnregister
+  #else
+    #define MCGEN_EVENTUNREGISTER      EventUnregister
+  #endif
+#endif // MCGEN_EVENTUNREGISTER
+#ifndef MCGEN_EVENTSETINFORMATION
+  #if MCGEN_USE_KERNEL_MODE_APIS
+    #define MCGEN_EVENTSETINFORMATION  EtwSetInformation
+  #else
+    #define MCGEN_EVENTSETINFORMATION  EventSetInformation
+  #endif
+#endif // MCGEN_EVENTSETINFORMATION
+#ifndef MCGEN_EVENTWRITETRANSFER
+  #if MCGEN_USE_KERNEL_MODE_APIS
+    #define MCGEN_EVENTWRITETRANSFER   EtwWriteTransfer
+  #else
+    #define MCGEN_EVENTWRITETRANSFER   EventWriteTransfer
+  #endif
+#endif // MCGEN_EVENTWRITETRANSFER
+
+//
+// MCGEN_EVENT_ENABLED macro:
+// Override to control how the EventWrite[EventName] macros determine whether
+// an event is enabled. The default behavior is for EventWrite[EventName] to
+// use the EventEnabled[EventName] macros.
+//
+#ifndef MCGEN_EVENT_ENABLED
+#define MCGEN_EVENT_ENABLED(EventName) EventEnabled##EventName()
+#endif
+
+//
+// MCGEN_EVENT_ENABLED_FORCONTEXT macro:
+// Override to control how the EventWrite[EventName]_ForContext macros
+// determine whether an event is enabled. The default behavior is for
+// EventWrite[EventName]_ForContext to use the
+// EventEnabled[EventName]_ForContext macros.
+//
+#ifndef MCGEN_EVENT_ENABLED_FORCONTEXT
+#define MCGEN_EVENT_ENABLED_FORCONTEXT(pContext, EventName) EventEnabled##EventName##_ForContext(pContext)
+#endif
+
+//
+// MCGEN_ENABLE_CHECK macro:
+// Determines whether the specified event would be considered as enabled
+// based on the state of the specified context. Slightly faster than calling
+// McGenEventEnabled directly.
+//
+#ifndef MCGEN_ENABLE_CHECK
+#define MCGEN_ENABLE_CHECK(Context, Descriptor) (Context.IsEnabled && McGenEventEnabled(&Context, &Descriptor))
+#endif
+
+#if !defined(MCGEN_TRACE_CONTEXT_DEF)
+#define MCGEN_TRACE_CONTEXT_DEF
+// This structure is for use by MC-generated code and should not be used directly.
+typedef struct _MCGEN_TRACE_CONTEXT
+{
+    TRACEHANDLE            RegistrationHandle;
+    TRACEHANDLE            Logger;      // Used as pointer to provider traits.
+    ULONGLONG              MatchAnyKeyword;
+    ULONGLONG              MatchAllKeyword;
+    ULONG                  Flags;
+    ULONG                  IsEnabled;
+    UCHAR                  Level;
+    UCHAR                  Reserve;
+    USHORT                 EnableBitsCount;
+    PULONG                 EnableBitMask;
+    const ULONGLONG*       EnableKeyWords;
+    const UCHAR*           EnableLevel;
+} MCGEN_TRACE_CONTEXT, *PMCGEN_TRACE_CONTEXT;
+#endif // MCGEN_TRACE_CONTEXT_DEF
+
+#if !defined(MCGEN_LEVEL_KEYWORD_ENABLED_DEF)
+#define MCGEN_LEVEL_KEYWORD_ENABLED_DEF
+//
+// Determines whether an event with a given Level and Keyword would be
+// considered as enabled based on the state of the specified context.
+// Note that you may want to use MCGEN_ENABLE_CHECK instead of calling this
+// function directly.
+//
+FORCEINLINE
+BOOLEAN
+McGenLevelKeywordEnabled(
+    _In_ PMCGEN_TRACE_CONTEXT EnableInfo,
+    _In_ UCHAR Level,
+    _In_ ULONGLONG Keyword
+    )
+{
+    //
+    // Check if the event Level is lower than the level at which
+    // the channel is enabled.
+    // If the event Level is 0 or the channel is enabled at level 0,
+    // all levels are enabled.
+    //
+
+    if ((Level <= EnableInfo->Level) || // This also covers the case of Level == 0.
+        (EnableInfo->Level == 0)) {
+
+        //
+        // Check if Keyword is enabled
+        //
+
+        if ((Keyword == (ULONGLONG)0) ||
+            ((Keyword & EnableInfo->MatchAnyKeyword) &&
+             ((Keyword & EnableInfo->MatchAllKeyword) == EnableInfo->MatchAllKeyword))) {
+            return TRUE;
+        }
+    }
+
+    return FALSE;
+}
+#endif // MCGEN_LEVEL_KEYWORD_ENABLED_DEF
+
+#if !defined(MCGEN_EVENT_ENABLED_DEF)
+#define MCGEN_EVENT_ENABLED_DEF
+//
+// Determines whether the specified event would be considered as enabled based
+// on the state of the specified context. Note that you may want to use
+// MCGEN_ENABLE_CHECK instead of calling this function directly.
+//
+FORCEINLINE
+BOOLEAN
+McGenEventEnabled(
+    _In_ PMCGEN_TRACE_CONTEXT EnableInfo,
+    _In_ PCEVENT_DESCRIPTOR EventDescriptor
+    )
+{
+    return McGenLevelKeywordEnabled(EnableInfo, EventDescriptor->Level, EventDescriptor->Keyword);
+}
+#endif // MCGEN_EVENT_ENABLED_DEF
+
+#if !defined(MCGEN_CONTROL_CALLBACK)
+#define MCGEN_CONTROL_CALLBACK
+
+// This function is for use by MC-generated code and should not be used directly.
+DECLSPEC_NOINLINE __inline
+VOID
+__stdcall
+McGenControlCallbackV2(
+    _In_ LPCGUID SourceId,
+    _In_ ULONG ControlCode,
+    _In_ UCHAR Level,
+    _In_ ULONGLONG MatchAnyKeyword,
+    _In_ ULONGLONG MatchAllKeyword,
+    _In_opt_ PEVENT_FILTER_DESCRIPTOR FilterData,
+    _Inout_opt_ PVOID CallbackContext
+    )
+/*++
+
+Routine Description:
+
+    This is the notification callback for Windows Vista and later.
+
+Arguments:
+
+    SourceId - The GUID that identifies the session that enabled the provider.
+
+    ControlCode - The parameter indicates whether the provider
+                  is being enabled or disabled.
+
+    Level - The level at which the event is enabled.
+
+    MatchAnyKeyword - The bitmask of keywords that the provider uses to
+                      determine the category of events that it writes.
+
+    MatchAllKeyword - This bitmask additionally restricts the category
+                      of events that the provider writes.
+
+    FilterData - The provider-defined data.
+
+    CallbackContext - The context of the callback that is defined when the provider
+                      called EtwRegister to register itself.
+
+Remarks:
+
+    ETW calls this function to notify provider of enable/disable
+
+--*/
+{
+    PMCGEN_TRACE_CONTEXT Ctx = (PMCGEN_TRACE_CONTEXT)CallbackContext;
+    ULONG Ix;
+#ifndef MCGEN_PRIVATE_ENABLE_CALLBACK_V2
+    UNREFERENCED_PARAMETER(SourceId);
+    UNREFERENCED_PARAMETER(FilterData);
+#endif
+
+    if (Ctx == NULL) {
+        return;
+    }
+
+    switch (ControlCode) {
+
+        case EVENT_CONTROL_CODE_ENABLE_PROVIDER:
+            Ctx->Level = Level;
+            Ctx->MatchAnyKeyword = MatchAnyKeyword;
+            Ctx->MatchAllKeyword = MatchAllKeyword;
+            Ctx->IsEnabled = EVENT_CONTROL_CODE_ENABLE_PROVIDER;
+
+            for (Ix = 0; Ix < Ctx->EnableBitsCount; Ix += 1) {
+                if (McGenLevelKeywordEnabled(Ctx, Ctx->EnableLevel[Ix], Ctx->EnableKeyWords[Ix]) != FALSE) {
+                    Ctx->EnableBitMask[Ix >> 5] |= (1 << (Ix % 32));
+                } else {
+                    Ctx->EnableBitMask[Ix >> 5] &= ~(1 << (Ix % 32));
+                }
+            }
+            break;
+
+        case EVENT_CONTROL_CODE_DISABLE_PROVIDER:
+            Ctx->IsEnabled = EVENT_CONTROL_CODE_DISABLE_PROVIDER;
+            Ctx->Level = 0;
+            Ctx->MatchAnyKeyword = 0;
+            Ctx->MatchAllKeyword = 0;
+            if (Ctx->EnableBitsCount > 0) {
+#pragma warning(suppress: 26451) // Arithmetic overflow cannot occur, no matter the value of EnableBitCount
+                RtlZeroMemory(Ctx->EnableBitMask, (((Ctx->EnableBitsCount - 1) / 32) + 1) * sizeof(ULONG));
+            }
+            break;
+
+        default:
+            break;
+    }
+
+#ifdef MCGEN_PRIVATE_ENABLE_CALLBACK_V2
+    //
+    // Call user defined callback
+    //
+    MCGEN_PRIVATE_ENABLE_CALLBACK_V2(
+        SourceId,
+        ControlCode,
+        Level,
+        MatchAnyKeyword,
+        MatchAllKeyword,
+        FilterData,
+        CallbackContext
+        );
+#endif // MCGEN_PRIVATE_ENABLE_CALLBACK_V2
+
+    return;
+}
+
+#endif // MCGEN_CONTROL_CALLBACK
+
+#ifndef _mcgen_PENABLECALLBACK
+  #if MCGEN_USE_KERNEL_MODE_APIS
+    #define _mcgen_PENABLECALLBACK      PETWENABLECALLBACK
+  #else
+    #define _mcgen_PENABLECALLBACK      PENABLECALLBACK
+  #endif
+#endif // _mcgen_PENABLECALLBACK
+
+#if !defined(_mcgen_PASTE2)
+// This macro is for use by MC-generated code and should not be used directly.
+#define _mcgen_PASTE2(a, b) _mcgen_PASTE2_imp(a, b)
+#define _mcgen_PASTE2_imp(a, b) a##b
+#endif // _mcgen_PASTE2
+
+#if !defined(_mcgen_PASTE3)
+// This macro is for use by MC-generated code and should not be used directly.
+#define _mcgen_PASTE3(a, b, c) _mcgen_PASTE3_imp(a, b, c)
+#define _mcgen_PASTE3_imp(a, b, c) a##b##_##c
+#endif // _mcgen_PASTE3
+
+//
+// Macro validation
+//
+
+// Validate MCGEN_EVENTREGISTER:
+
+// Trigger an error if MCGEN_EVENTREGISTER is not an unqualified (simple) identifier:
+struct _mcgen_PASTE2(MCGEN_EVENTREGISTER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTREGISTER);
+
+// Trigger an error if MCGEN_EVENTREGISTER is redefined:
+typedef struct _mcgen_PASTE2(MCGEN_EVENTREGISTER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTREGISTER)
+    MCGEN_EVENTREGISTER_must_not_be_redefined_between_headers;
+
+// Trigger an error if MCGEN_EVENTREGISTER is defined as a function-like macro:
+typedef void MCGEN_EVENTREGISTER_must_not_be_a_functionLike_macro_MCGEN_EVENTREGISTER;
+typedef int _mcgen_PASTE2(MCGEN_EVENTREGISTER_must_not_be_a_functionLike_macro_, MCGEN_EVENTREGISTER);
+
+// Validate MCGEN_EVENTUNREGISTER:
+
+// Trigger an error if MCGEN_EVENTUNREGISTER is not an unqualified (simple) identifier:
+struct _mcgen_PASTE2(MCGEN_EVENTUNREGISTER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTUNREGISTER);
+
+// Trigger an error if MCGEN_EVENTUNREGISTER is redefined:
+typedef struct _mcgen_PASTE2(MCGEN_EVENTUNREGISTER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTUNREGISTER)
+    MCGEN_EVENTUNREGISTER_must_not_be_redefined_between_headers;
+
+// Trigger an error if MCGEN_EVENTUNREGISTER is defined as a function-like macro:
+typedef void MCGEN_EVENTUNREGISTER_must_not_be_a_functionLike_macro_MCGEN_EVENTUNREGISTER;
+typedef int _mcgen_PASTE2(MCGEN_EVENTUNREGISTER_must_not_be_a_functionLike_macro_, MCGEN_EVENTUNREGISTER);
+
+// Validate MCGEN_EVENTSETINFORMATION:
+
+// Trigger an error if MCGEN_EVENTSETINFORMATION is not an unqualified (simple) identifier:
+struct _mcgen_PASTE2(MCGEN_EVENTSETINFORMATION_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTSETINFORMATION);
+
+// Trigger an error if MCGEN_EVENTSETINFORMATION is redefined:
+typedef struct _mcgen_PASTE2(MCGEN_EVENTSETINFORMATION_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTSETINFORMATION)
+    MCGEN_EVENTSETINFORMATION_must_not_be_redefined_between_headers;
+
+// Trigger an error if MCGEN_EVENTSETINFORMATION is defined as a function-like macro:
+typedef void MCGEN_EVENTSETINFORMATION_must_not_be_a_functionLike_macro_MCGEN_EVENTSETINFORMATION;
+typedef int _mcgen_PASTE2(MCGEN_EVENTSETINFORMATION_must_not_be_a_functionLike_macro_, MCGEN_EVENTSETINFORMATION);
+
+// Validate MCGEN_EVENTWRITETRANSFER:
+
+// Trigger an error if MCGEN_EVENTWRITETRANSFER is not an unqualified (simple) identifier:
+struct _mcgen_PASTE2(MCGEN_EVENTWRITETRANSFER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTWRITETRANSFER);
+
+// Trigger an error if MCGEN_EVENTWRITETRANSFER is redefined:
+typedef struct _mcgen_PASTE2(MCGEN_EVENTWRITETRANSFER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTWRITETRANSFER)
+    MCGEN_EVENTWRITETRANSFER_must_not_be_redefined_between_headers;;
+
+// Trigger an error if MCGEN_EVENTWRITETRANSFER is defined as a function-like macro:
+typedef void MCGEN_EVENTWRITETRANSFER_must_not_be_a_functionLike_macro_MCGEN_EVENTWRITETRANSFER;
+typedef int _mcgen_PASTE2(MCGEN_EVENTWRITETRANSFER_must_not_be_a_functionLike_macro_, MCGEN_EVENTWRITETRANSFER);
+
+#ifndef McGenEventWrite_def
+#define McGenEventWrite_def
+
+// This macro is for use by MC-generated code and should not be used directly.
+#define McGenEventWrite _mcgen_PASTE2(McGenEventWrite_, MCGEN_EVENTWRITETRANSFER)
+
+// This function is for use by MC-generated code and should not be used directly.
+DECLSPEC_NOINLINE __inline
+ULONG __stdcall
+McGenEventWrite(
+    _In_ PMCGEN_TRACE_CONTEXT Context,
+    _In_ PCEVENT_DESCRIPTOR Descriptor,
+    _In_opt_ LPCGUID ActivityId,
+    _In_range_(1, 128) ULONG EventDataCount,
+    _Pre_cap_(EventDataCount) EVENT_DATA_DESCRIPTOR* EventData
+    )
+{
+    const USHORT UNALIGNED* Traits;
+
+    // Some customized MCGEN_EVENTWRITETRANSFER macros might ignore ActivityId.
+    UNREFERENCED_PARAMETER(ActivityId);
+
+    Traits = (const USHORT UNALIGNED*)(UINT_PTR)Context->Logger;
+
+    if (Traits == NULL) {
+        EventData[0].Ptr = 0;
+        EventData[0].Size = 0;
+        EventData[0].Reserved = 0;
+    } else {
+        EventData[0].Ptr = (ULONG_PTR)Traits;
+        EventData[0].Size = *Traits;
+        EventData[0].Reserved = 2; // EVENT_DATA_DESCRIPTOR_TYPE_PROVIDER_METADATA
+    }
+
+    return MCGEN_EVENTWRITETRANSFER(
+        Context->RegistrationHandle,
+        Descriptor,
+        ActivityId,
+        NULL,
+        EventDataCount,
+        EventData);
+}
+#endif // McGenEventWrite_def
+
+#if !defined(McGenEventRegisterUnregister)
+#define McGenEventRegisterUnregister
+
+// This macro is for use by MC-generated code and should not be used directly.
+#define McGenEventRegister _mcgen_PASTE2(McGenEventRegister_, MCGEN_EVENTREGISTER)
+
+#pragma warning(push)
+#pragma warning(disable:6103)
+// This function is for use by MC-generated code and should not be used directly.
+DECLSPEC_NOINLINE __inline
+ULONG __stdcall
+McGenEventRegister(
+    _In_ LPCGUID ProviderId,
+    _In_opt_ _mcgen_PENABLECALLBACK EnableCallback,
+    _In_opt_ PVOID CallbackContext,
+    _Inout_ PREGHANDLE RegHandle
+    )
+/*++
+
+Routine Description:
+
+    This function registers the provider with ETW.
+
+Arguments:
+
+    ProviderId - Provider ID to register with ETW.
+
+    EnableCallback - Callback to be used.
+
+    CallbackContext - Context for the callback.
+
+    RegHandle - Pointer to registration handle.
+
+Remarks:
+
+    Should not be called if the provider is already registered (i.e. should not
+    be called if *RegHandle != 0). Repeatedly registering a provider is a bug
+    and may indicate a race condition. However, for compatibility with previous
+    behavior, this function will return SUCCESS in this case.
+
+--*/
+{
+    ULONG Error;
+
+    if (*RegHandle != 0)
+    {
+        Error = 0; // ERROR_SUCCESS
+    }
+    else
+    {
+        Error = MCGEN_EVENTREGISTER(ProviderId, EnableCallback, CallbackContext, RegHandle);
+    }
+
+    return Error;
+}
+#pragma warning(pop)
+
+// This macro is for use by MC-generated code and should not be used directly.
+#define McGenEventUnregister _mcgen_PASTE2(McGenEventUnregister_, MCGEN_EVENTUNREGISTER)
+
+// This function is for use by MC-generated code and should not be used directly.
+DECLSPEC_NOINLINE __inline
+ULONG __stdcall
+McGenEventUnregister(_Inout_ PREGHANDLE RegHandle)
+/*++
+
+Routine Description:
+
+    Unregister from ETW and set *RegHandle = 0.
+
+Arguments:
+
+    RegHandle - the pointer to the provider registration handle
+
+Remarks:
+
+    If provider has not been registered (i.e. if *RegHandle == 0),
+    return SUCCESS. It is safe to call McGenEventUnregister even if the
+    call to McGenEventRegister returned an error.
+
+--*/
+{
+    ULONG Error;
+
+    if(*RegHandle == 0)
+    {
+        Error = 0; // ERROR_SUCCESS
+    }
+    else
+    {
+        Error = MCGEN_EVENTUNREGISTER(*RegHandle);
+        *RegHandle = (REGHANDLE)0;
+    }
+
+    return Error;
+}
+
+#endif // McGenEventRegisterUnregister
+
+#ifndef _mcgen_EVENT_BIT_SET
+  #if defined(_M_IX86) || defined(_M_X64)
+    // This macro is for use by MC-generated code and should not be used directly.
+    #define _mcgen_EVENT_BIT_SET(EnableBits, BitPosition) ((((const unsigned char*)EnableBits)[BitPosition >> 3] & (1u << (BitPosition & 7))) != 0)
+  #else // CPU type
+    // This macro is for use by MC-generated code and should not be used directly.
+    #define _mcgen_EVENT_BIT_SET(EnableBits, BitPosition) ((EnableBits[BitPosition >> 5] & (1u << (BitPosition & 31))) != 0)
+  #endif // CPU type
+#endif // _mcgen_EVENT_BIT_SET
+
+#endif // MCGEN_DISABLE_PROVIDER_CODE_GENERATION
+
+//+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+// Provider "microsoft-windows-mimalloc" event count 2
+//+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+// Provider GUID = 138f4dbb-ee04-4899-aa0a-572ad4475779
+EXTERN_C __declspec(selectany) const GUID ETW_MI_Provider = {0x138f4dbb, 0xee04, 0x4899, {0xaa, 0x0a, 0x57, 0x2a, 0xd4, 0x47, 0x57, 0x79}};
+
+#ifndef ETW_MI_Provider_Traits
+#define ETW_MI_Provider_Traits NULL
+#endif // ETW_MI_Provider_Traits
+
+//
+// Event Descriptors
+//
+EXTERN_C __declspec(selectany) const EVENT_DESCRIPTOR ETW_MI_ALLOC = {0x64, 0x1, 0x0, 0x4, 0x0, 0x0, 0x0};
+#define ETW_MI_ALLOC_value 0x64
+EXTERN_C __declspec(selectany) const EVENT_DESCRIPTOR ETW_MI_FREE = {0x65, 0x1, 0x0, 0x4, 0x0, 0x0, 0x0};
+#define ETW_MI_FREE_value 0x65
+
+//
+// MCGEN_DISABLE_PROVIDER_CODE_GENERATION macro:
+// Define this macro to have the compiler skip the generated functions in this
+// header.
+//
+#ifndef MCGEN_DISABLE_PROVIDER_CODE_GENERATION
+
+//
+// Event Enablement Bits
+// These variables are for use by MC-generated code and should not be used directly.
+//
+EXTERN_C __declspec(selectany) DECLSPEC_CACHEALIGN ULONG microsoft_windows_mimallocEnableBits[1];
+EXTERN_C __declspec(selectany) const ULONGLONG microsoft_windows_mimallocKeywords[1] = {0x0};
+EXTERN_C __declspec(selectany) const unsigned char microsoft_windows_mimallocLevels[1] = {4};
+
+//
+// Provider context
+//
+EXTERN_C __declspec(selectany) MCGEN_TRACE_CONTEXT ETW_MI_Provider_Context = {0, (ULONG_PTR)ETW_MI_Provider_Traits, 0, 0, 0, 0, 0, 0, 1, microsoft_windows_mimallocEnableBits, microsoft_windows_mimallocKeywords, microsoft_windows_mimallocLevels};
+
+//
+// Provider REGHANDLE
+//
+#define microsoft_windows_mimallocHandle (ETW_MI_Provider_Context.RegistrationHandle)
+
+//
+// This macro is set to 0, indicating that the EventWrite[Name] macros do not
+// have an Activity parameter. This is controlled by the -km and -um options.
+//
+#define ETW_MI_Provider_EventWriteActivity 0
+
+//
+// Register with ETW using the control GUID specified in the manifest.
+// Invoke this macro during module initialization (i.e. program startup,
+// DLL process attach, or driver load) to initialize the provider.
+// Note that if this function returns an error, the error means that
+// will not work, but no action needs to be taken -- even if EventRegister
+// returns an error, it is generally safe to use EventWrite and
+// EventUnregister macros (they will be no-ops if EventRegister failed).
+//
+#ifndef EventRegistermicrosoft_windows_mimalloc
+#define EventRegistermicrosoft_windows_mimalloc() McGenEventRegister(&ETW_MI_Provider, McGenControlCallbackV2, &ETW_MI_Provider_Context, &microsoft_windows_mimallocHandle)
+#endif
+
+//
+// Register with ETW using a specific control GUID (i.e. a GUID other than what
+// is specified in the manifest). Advanced scenarios only.
+//
+#ifndef EventRegisterByGuidmicrosoft_windows_mimalloc
+#define EventRegisterByGuidmicrosoft_windows_mimalloc(Guid) McGenEventRegister(&(Guid), McGenControlCallbackV2, &ETW_MI_Provider_Context, &microsoft_windows_mimallocHandle)
+#endif
+
+//
+// Unregister with ETW and close the provider.
+// Invoke this macro during module shutdown (i.e. program exit, DLL process
+// detach, or driver unload) to unregister the provider.
+// Note that you MUST call EventUnregister before DLL or driver unload
+// (not optional): failure to unregister a provider before DLL or driver unload
+// will result in crashes.
+//
+#ifndef EventUnregistermicrosoft_windows_mimalloc
+#define EventUnregistermicrosoft_windows_mimalloc() McGenEventUnregister(&microsoft_windows_mimallocHandle)
+#endif
+
+//
+// MCGEN_ENABLE_FORCONTEXT_CODE_GENERATION macro:
+// Define this macro to enable support for caller-allocated provider context.
+//
+#ifdef MCGEN_ENABLE_FORCONTEXT_CODE_GENERATION
+
+//
+// Advanced scenarios: Caller-allocated provider context.
+// Use when multiple differently-configured provider handles are needed,
+// e.g. for container-aware drivers, one context per container.
+//
+// Usage:
+//
+// - Caller enables the feature before including this header, e.g.
+//   #define MCGEN_ENABLE_FORCONTEXT_CODE_GENERATION 1
+// - Caller allocates memory, e.g. pContext = malloc(sizeof(McGenContext_microsoft_windows_mimalloc));
+// - Caller registers the provider, e.g. EventRegistermicrosoft_windows_mimalloc_ForContext(pContext);
+// - Caller writes events, e.g. EventWriteMyEvent_ForContext(pContext, ...);
+// - Caller unregisters, e.g. EventUnregistermicrosoft_windows_mimalloc_ForContext(pContext);
+// - Caller frees memory, e.g. free(pContext);
+//
+
+typedef struct tagMcGenContext_microsoft_windows_mimalloc {
+    // The fields of this structure are subject to change and should
+    // not be accessed directly. To access the provider's REGHANDLE,
+    // use microsoft_windows_mimallocHandle_ForContext(pContext).
+    MCGEN_TRACE_CONTEXT Context;
+    ULONG EnableBits[1];
+} McGenContext_microsoft_windows_mimalloc;
+
+#define EventRegistermicrosoft_windows_mimalloc_ForContext(pContext)             _mcgen_PASTE2(_mcgen_RegisterForContext_microsoft_windows_mimalloc_, MCGEN_EVENTREGISTER)(&ETW_MI_Provider, pContext)
+#define EventRegisterByGuidmicrosoft_windows_mimalloc_ForContext(Guid, pContext) _mcgen_PASTE2(_mcgen_RegisterForContext_microsoft_windows_mimalloc_, MCGEN_EVENTREGISTER)(&(Guid), pContext)
+#define EventUnregistermicrosoft_windows_mimalloc_ForContext(pContext)           McGenEventUnregister(&(pContext)->Context.RegistrationHandle)
+
+//
+// Provider REGHANDLE for caller-allocated context.
+//
+#define microsoft_windows_mimallocHandle_ForContext(pContext) ((pContext)->Context.RegistrationHandle)
+
+// This function is for use by MC-generated code and should not be used directly.
+// Initialize and register the caller-allocated context.
+__inline
+ULONG __stdcall
+_mcgen_PASTE2(_mcgen_RegisterForContext_microsoft_windows_mimalloc_, MCGEN_EVENTREGISTER)(
+    _In_ LPCGUID pProviderId,
+    _Out_ McGenContext_microsoft_windows_mimalloc* pContext)
+{
+    RtlZeroMemory(pContext, sizeof(*pContext));
+    pContext->Context.Logger = (ULONG_PTR)ETW_MI_Provider_Traits;
+    pContext->Context.EnableBitsCount = 1;
+    pContext->Context.EnableBitMask = pContext->EnableBits;
+    pContext->Context.EnableKeyWords = microsoft_windows_mimallocKeywords;
+    pContext->Context.EnableLevel = microsoft_windows_mimallocLevels;
+    return McGenEventRegister(
+        pProviderId,
+        McGenControlCallbackV2,
+        &pContext->Context,
+        &pContext->Context.RegistrationHandle);
+}
+
+// This function is for use by MC-generated code and should not be used directly.
+// Trigger a compile error if called with the wrong parameter type.
+FORCEINLINE
+_Ret_ McGenContext_microsoft_windows_mimalloc*
+_mcgen_CheckContextType_microsoft_windows_mimalloc(_In_ McGenContext_microsoft_windows_mimalloc* pContext)
+{
+    return pContext;
+}
+
+#endif // MCGEN_ENABLE_FORCONTEXT_CODE_GENERATION
+
+//
+// Enablement check macro for event "ETW_MI_ALLOC"
+//
+#define EventEnabledETW_MI_ALLOC() _mcgen_EVENT_BIT_SET(microsoft_windows_mimallocEnableBits, 0)
+#define EventEnabledETW_MI_ALLOC_ForContext(pContext) _mcgen_EVENT_BIT_SET(_mcgen_CheckContextType_microsoft_windows_mimalloc(pContext)->EnableBits, 0)
+
+//
+// Event write macros for event "ETW_MI_ALLOC"
+//
+#define EventWriteETW_MI_ALLOC(Address, Size) \
+        MCGEN_EVENT_ENABLED(ETW_MI_ALLOC) \
+        ? _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC(&ETW_MI_Provider_Context, &ETW_MI_ALLOC, Address, Size) : 0
+#define EventWriteETW_MI_ALLOC_AssumeEnabled(Address, Size) \
+        _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC(&ETW_MI_Provider_Context, &ETW_MI_ALLOC, Address, Size)
+#define EventWriteETW_MI_ALLOC_ForContext(pContext, Address, Size) \
+        MCGEN_EVENT_ENABLED_FORCONTEXT(pContext, ETW_MI_ALLOC) \
+        ? _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC(&(pContext)->Context, &ETW_MI_ALLOC, Address, Size) : 0
+#define EventWriteETW_MI_ALLOC_ForContextAssumeEnabled(pContext, Address, Size) \
+        _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC(&_mcgen_CheckContextType_microsoft_windows_mimalloc(pContext)->Context, &ETW_MI_ALLOC, Address, Size)
+
+// This macro is for use by MC-generated code and should not be used directly.
+#define _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC _mcgen_PASTE2(McTemplateU0xx_, MCGEN_EVENTWRITETRANSFER)
+
+//
+// Enablement check macro for event "ETW_MI_FREE"
+//
+#define EventEnabledETW_MI_FREE() _mcgen_EVENT_BIT_SET(microsoft_windows_mimallocEnableBits, 0)
+#define EventEnabledETW_MI_FREE_ForContext(pContext) _mcgen_EVENT_BIT_SET(_mcgen_CheckContextType_microsoft_windows_mimalloc(pContext)->EnableBits, 0)
+
+//
+// Event write macros for event "ETW_MI_FREE"
+//
+#define EventWriteETW_MI_FREE(Address, Size) \
+        MCGEN_EVENT_ENABLED(ETW_MI_FREE) \
+        ? _mcgen_TEMPLATE_FOR_ETW_MI_FREE(&ETW_MI_Provider_Context, &ETW_MI_FREE, Address, Size) : 0
+#define EventWriteETW_MI_FREE_AssumeEnabled(Address, Size) \
+        _mcgen_TEMPLATE_FOR_ETW_MI_FREE(&ETW_MI_Provider_Context, &ETW_MI_FREE, Address, Size)
+#define EventWriteETW_MI_FREE_ForContext(pContext, Address, Size) \
+        MCGEN_EVENT_ENABLED_FORCONTEXT(pContext, ETW_MI_FREE) \
+        ? _mcgen_TEMPLATE_FOR_ETW_MI_FREE(&(pContext)->Context, &ETW_MI_FREE, Address, Size) : 0
+#define EventWriteETW_MI_FREE_ForContextAssumeEnabled(pContext, Address, Size) \
+        _mcgen_TEMPLATE_FOR_ETW_MI_FREE(&_mcgen_CheckContextType_microsoft_windows_mimalloc(pContext)->Context, &ETW_MI_FREE, Address, Size)
+
+// This macro is for use by MC-generated code and should not be used directly.
+#define _mcgen_TEMPLATE_FOR_ETW_MI_FREE _mcgen_PASTE2(McTemplateU0xx_, MCGEN_EVENTWRITETRANSFER)
+
+#endif // MCGEN_DISABLE_PROVIDER_CODE_GENERATION
+
+//
+// MCGEN_DISABLE_PROVIDER_CODE_GENERATION macro:
+// Define this macro to have the compiler skip the generated functions in this
+// header.
+//
+#ifndef MCGEN_DISABLE_PROVIDER_CODE_GENERATION
+
+//
+// Template Functions
+//
+
+//
+// Function for template "ETW_CUSTOM_HEAP_ALLOC_DATA" (and possibly others).
+// This function is for use by MC-generated code and should not be used directly.
+//
+#ifndef McTemplateU0xx_def
+#define McTemplateU0xx_def
+ETW_INLINE
+ULONG
+_mcgen_PASTE2(McTemplateU0xx_, MCGEN_EVENTWRITETRANSFER)(
+    _In_ PMCGEN_TRACE_CONTEXT Context,
+    _In_ PCEVENT_DESCRIPTOR Descriptor,
+    _In_ const unsigned __int64  _Arg0,
+    _In_ const unsigned __int64  _Arg1
+    )
+{
+#define McTemplateU0xx_ARGCOUNT 2
+
+    EVENT_DATA_DESCRIPTOR EventData[McTemplateU0xx_ARGCOUNT + 1];
+
+    EventDataDescCreate(&EventData[1],&_Arg0, sizeof(const unsigned __int64)  );
+
+    EventDataDescCreate(&EventData[2],&_Arg1, sizeof(const unsigned __int64)  );
+
+    return McGenEventWrite(Context, Descriptor, NULL, McTemplateU0xx_ARGCOUNT + 1, EventData);
+}
+#endif // McTemplateU0xx_def
+
+#endif // MCGEN_DISABLE_PROVIDER_CODE_GENERATION
+
+#if defined(__cplusplus)
+}
+#endif
diff --git a/3rd/mimalloc-3.2.6/src/prim/windows/etw.man b/3rd/mimalloc-3.2.6/src/prim/windows/etw.man
new file mode 100644
index 0000000000000000000000000000000000000000..cfd1f8a9eaacd50af63f1e28f9540aa88c20f90c
GIT binary patch
literal 3926
zcmeH~T~8B16o${WiT`2c+NGc<*i;EYh$d8xl<0*CS-Mb~v<qPu>PP(R>hsQY*nU!q
z$b})3?##}d?{nTW+uy%xwr*doYaNU1!Vc}sa%<a(W%kk*Y}poVi8<mVvnKl~r<t|f
z>7F%g+hVAmL$hwL?4dodnmuAKhUXm0)X9|WHj>XRahh@~SWDIkbduX;B#u6^Q>@U=
zDO8U+KXa0*th&%f*z^Udh4ol@uE=Q&`emUsh_CA`FOXgI{i-`XZ9C#bR1yBm=PJ*p
z9kVN$J6O;h;8HY>p)RnhY8A#Hb?z)_!y(Iaen(I)@-9CrSSp(;_Jn9I*$S&ATjP1?
zVxB>pV@LVsy;^jZr7r$HNAm06TcUiI`l@~F$Mt$E%Shfd3O+h1vFhR9a8yQZ@wpne
zr3bI-p=VEdo{)#uWxSVJeYQF|-5tnq>~f+CP~A0&{v=(up=ngEDl>5!$EDwPRaNjW
zXj|wbG$OwmwaW-hMvBK%pbm3wpic7<iGe1}wLX@G{?G|Bd>1O^dzbxT%*13+SP9h-
zI~rA5hapTVnk|qmiIVYy{__+x9f7OV4j3`g4;{{8_SWnLBSu2PUc%~`t%Ae^>J`SS
zdtZg-r<0xAH*_ALtK;Nv(d9nbKK1jK=Ld)I(jQrKhBjgToR#Wm8{0a}@6ZuEO<gbQ
zB3m5V%^3-v%%-25hY;g1&y$zH6XrqQ2)nL|zy>(lvG=y=Jh{M!4!-$(E)!vYUrf47
z<hkOaGap-@>nf4W$e&QFOovV_$>J%X*KN>oXI@jt%BJms>IU}I$<7<wvJG;y&ofxL
zZ?Ac4@EfiF;Qh3@Hq?u*mxUl}o``PSN9^9363^xS{`lRzaEI{>Hr{PChchs%+mx{%
zt(fa_PM4r63?1h#YOk~;byc5`>%q>sLHA1gohNq{qOREhxu<<B25jv)bQOG~tvEXM
zsbYvycWRYgeO<KaNEs4R@T0H57CiG+erb6HDME8{V+nzO*!Nwm{Em&P|NqBp)%s}N
z&KLRG82>y~`}YVhGe0?R_ceQ8vt^BpSNp6kErj_0hUNFyWQ1IOZ|GEgWBPwYFLgFu
Oo!*uqEBu%Ae18C_t1cS=

literal 0
HcmV?d00001

diff --git a/3rd/mimalloc-3.2.6/src/prim/windows/prim.c b/3rd/mimalloc-3.2.6/src/prim/windows/prim.c
new file mode 100644
index 00000000..27d0a38e
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/prim/windows/prim.c
@@ -0,0 +1,921 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+// This file is included in `src/prim/prim.c`
+
+#include "mimalloc.h"
+#include "mimalloc/internal.h"
+#include "mimalloc/prim.h"
+#include <stdio.h>   // fputs, stderr
+
+// xbox has no console IO
+#if !defined(WINAPI_FAMILY_PARTITION) || WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_SYSTEM)
+#define MI_HAS_CONSOLE_IO
+#endif
+
+//---------------------------------------------
+// Dynamically bind Windows API points for portability
+//---------------------------------------------
+
+#if defined(_MSC_VER)
+#pragma warning(disable:4996)   // don't use GetVersionExW
+#endif
+
+static DWORD win_major_version = 6;
+static DWORD win_minor_version = 0;
+
+// We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016.
+// So, we need to look it up dynamically to run on older systems. (use __stdcall for 32-bit compatibility)
+// NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB)
+// We define a minimal MEM_EXTENDED_PARAMETER ourselves in order to be able to compile with older SDK's.
+typedef enum MI_MEM_EXTENDED_PARAMETER_TYPE_E {
+  MiMemExtendedParameterInvalidType = 0,
+  MiMemExtendedParameterAddressRequirements,
+  MiMemExtendedParameterNumaNode,
+  MiMemExtendedParameterPartitionHandle,
+  MiMemExtendedParameterUserPhysicalHandle,
+  MiMemExtendedParameterAttributeFlags,
+  MiMemExtendedParameterMax
+} MI_MEM_EXTENDED_PARAMETER_TYPE;
+
+typedef struct DECLSPEC_ALIGN(8) MI_MEM_EXTENDED_PARAMETER_S {
+  struct { DWORD64 Type : 8; DWORD64 Reserved : 56; } Type;
+  union  { DWORD64 ULong64; PVOID Pointer; SIZE_T Size; HANDLE Handle; DWORD ULong; } Arg;
+} MI_MEM_EXTENDED_PARAMETER;
+
+typedef struct MI_MEM_ADDRESS_REQUIREMENTS_S {
+  PVOID  LowestStartingAddress;
+  PVOID  HighestEndingAddress;
+  SIZE_T Alignment;
+} MI_MEM_ADDRESS_REQUIREMENTS;
+
+#define MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE   0x00000010
+
+#include <winternl.h>
+typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG);
+typedef LONG  (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG);  // avoid NTSTATUS as it is not defined on xbox (pr #1084)
+static PVirtualAlloc2 pVirtualAlloc2 = NULL;
+static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL;
+
+// Similarly, GetNumaProcessorNodeEx is only supported since Windows 7  (and GetNumaNodeProcessorMask is not supported on xbox)
+typedef struct MI_PROCESSOR_NUMBER_S { WORD Group; BYTE Number; BYTE Reserved; } MI_PROCESSOR_NUMBER;
+
+typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(MI_PROCESSOR_NUMBER* ProcNumber);
+typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(MI_PROCESSOR_NUMBER* Processor, PUSHORT NodeNumber);
+typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINITY ProcessorMask);
+typedef BOOL (__stdcall *PGetNumaProcessorNode)(UCHAR Processor, PUCHAR NodeNumber);
+typedef BOOL (__stdcall* PGetNumaNodeProcessorMask)(UCHAR Node, PULONGLONG ProcessorMask);
+typedef BOOL (__stdcall* PGetNumaHighestNodeNumber)(PULONG Node);
+static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL;
+static PGetNumaProcessorNodeEx      pGetNumaProcessorNodeEx = NULL;
+static PGetNumaNodeProcessorMaskEx  pGetNumaNodeProcessorMaskEx = NULL;
+static PGetNumaProcessorNode        pGetNumaProcessorNode = NULL;
+static PGetNumaNodeProcessorMask    pGetNumaNodeProcessorMask = NULL;
+static PGetNumaHighestNodeNumber    pGetNumaHighestNodeNumber = NULL;
+
+// Not available on xbox
+typedef SIZE_T(__stdcall* PGetLargePageMinimum)(VOID);
+static PGetLargePageMinimum pGetLargePageMinimum = NULL;
+
+// Available after Windows XP
+typedef BOOL (__stdcall *PGetPhysicallyInstalledSystemMemory)( PULONGLONG TotalMemoryInKilobytes );
+typedef BOOL (__stdcall* PGetVersionExW)(LPOSVERSIONINFOW lpVersionInformation);
+
+
+//---------------------------------------------
+// Enable large page support dynamically (if possible)
+//---------------------------------------------
+
+static bool win_enable_large_os_pages(size_t* large_page_size)
+{
+  static bool large_initialized = false;
+  if (large_initialized) return (_mi_os_large_page_size() > 0);
+  large_initialized = true;
+  if (pGetLargePageMinimum==NULL) return false;  // no large page support (xbox etc.)
+
+  // Try to see if large OS pages are supported
+  // To use large pages on Windows, we first need access permission
+  // Set "Lock pages in memory" permission in the group policy editor
+  // <https://devblogs.microsoft.com/oldnewthing/20110128-00/?p=11643>
+  unsigned long err = 0;
+  HANDLE token = NULL;
+  BOOL ok = OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token);
+  if (ok) {
+    TOKEN_PRIVILEGES tp;
+    ok = LookupPrivilegeValue(NULL, TEXT("SeLockMemoryPrivilege"), &tp.Privileges[0].Luid);
+    if (ok) {
+      tp.PrivilegeCount = 1;
+      tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
+      ok = AdjustTokenPrivileges(token, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0);
+      if (ok) {
+        err = GetLastError();
+        ok = (err == ERROR_SUCCESS);
+        if (ok && large_page_size != NULL && pGetLargePageMinimum != NULL) {
+          *large_page_size = (*pGetLargePageMinimum)();
+        }
+      }
+    }
+    CloseHandle(token);
+  }
+  if (!ok) {
+    if (err == 0) err = GetLastError();
+    _mi_warning_message("cannot enable large OS page support, error %lu\n", err);
+  }
+  return (ok!=0);
+}
+
+
+//---------------------------------------------
+// Initialize
+//---------------------------------------------
+
+static DWORD win_allocation_granularity = 64*MI_KiB;
+
+void _mi_prim_mem_init( mi_os_mem_config_t* config )
+{
+  config->has_overcommit = false;
+  config->has_partial_free = false;
+  config->has_virtual_reserve = true;
+
+  // get the page size
+  SYSTEM_INFO si; _mi_memzero_var(si);
+  GetSystemInfo(&si);
+  if (si.dwPageSize > 0) { config->page_size = si.dwPageSize; }
+  if (si.dwAllocationGranularity > 0) {
+    config->alloc_granularity = si.dwAllocationGranularity;
+    win_allocation_granularity = si.dwAllocationGranularity;
+  }
+  // get virtual address bits
+  if ((uintptr_t)si.lpMaximumApplicationAddress > 0) {
+    const size_t vbits = MI_SIZE_BITS - mi_clz((uintptr_t)si.lpMaximumApplicationAddress);
+    config->virtual_address_bits = vbits;
+  }
+
+  // get the VirtualAlloc2 function
+  HINSTANCE hDll = LoadLibrary(TEXT("kernelbase.dll"));
+  if (hDll != NULL) {
+    // use VirtualAlloc2FromApp if possible as it is available to Windows store apps
+    pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp");
+    if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2");
+    FreeLibrary(hDll);
+  }
+  // NtAllocateVirtualMemoryEx is used for huge page allocation
+  hDll = LoadLibrary(TEXT("ntdll.dll"));
+  if (hDll != NULL) {
+    pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx");
+    FreeLibrary(hDll);
+  }
+  // Try to use Win7+ numa API
+  hDll = LoadLibrary(TEXT("kernel32.dll"));
+  if (hDll != NULL) {
+    pGetCurrentProcessorNumberEx = (PGetCurrentProcessorNumberEx)(void (*)(void))GetProcAddress(hDll, "GetCurrentProcessorNumberEx");
+    pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx");
+    pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx");
+    pGetNumaProcessorNode = (PGetNumaProcessorNode)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNode");
+    pGetNumaNodeProcessorMask = (PGetNumaNodeProcessorMask)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMask");
+    pGetNumaHighestNodeNumber = (PGetNumaHighestNodeNumber)(void (*)(void))GetProcAddress(hDll, "GetNumaHighestNodeNumber");
+    pGetLargePageMinimum = (PGetLargePageMinimum)(void (*)(void))GetProcAddress(hDll, "GetLargePageMinimum");
+    // Get physical memory (not available on XP, so check dynamically)
+    PGetPhysicallyInstalledSystemMemory pGetPhysicallyInstalledSystemMemory = (PGetPhysicallyInstalledSystemMemory)(void (*)(void))GetProcAddress(hDll,"GetPhysicallyInstalledSystemMemory");
+    if (pGetPhysicallyInstalledSystemMemory != NULL) {
+      ULONGLONG memInKiB = 0;
+      if ((*pGetPhysicallyInstalledSystemMemory)(&memInKiB)) {
+        if (memInKiB > 0 && memInKiB <= SIZE_MAX) {
+          config->physical_memory_in_kib = (size_t)memInKiB;
+        }
+      }
+    }
+    // Get Windows version
+    PGetVersionExW pGetVersionExW = (PGetVersionExW)(void (*)(void))GetProcAddress(hDll, "GetVersionExW");
+    if (pGetVersionExW != NULL) {
+      OSVERSIONINFOW version; _mi_memzero_var(version);
+      version.dwOSVersionInfoSize = sizeof(version);
+      if ((*pGetVersionExW)(&version)) {
+        win_major_version = version.dwMajorVersion;
+        win_minor_version = version.dwMinorVersion;
+      }
+    }
+    FreeLibrary(hDll);
+  }
+  // Enable large/huge OS page support?
+  if (mi_option_is_enabled(mi_option_allow_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
+    win_enable_large_os_pages(&config->large_page_size);
+  }
+}
+
+
+//---------------------------------------------
+// Free
+//---------------------------------------------
+
+int _mi_prim_free(void* addr, size_t size ) {
+  MI_UNUSED(size);
+  DWORD errcode = 0;
+  bool err = (VirtualFree(addr, 0, MEM_RELEASE) == 0);
+  if (err) { errcode = GetLastError(); }
+  if (errcode == ERROR_INVALID_ADDRESS) {
+    // In mi_os_mem_alloc_aligned the fallback path may have returned a pointer inside
+    // the memory region returned by VirtualAlloc; in that case we need to free using
+    // the start of the region.
+    MEMORY_BASIC_INFORMATION info; _mi_memzero_var(info);
+    VirtualQuery(addr, &info, sizeof(info));
+    if (info.AllocationBase < addr && ((uint8_t*)addr - (uint8_t*)info.AllocationBase) < (ptrdiff_t)(4*MI_MiB)) {
+      errcode = 0;
+      err = (VirtualFree(info.AllocationBase, 0, MEM_RELEASE) == 0);
+      if (err) { errcode = GetLastError(); }
+    }
+  }
+  return (int)errcode;
+}
+
+
+//---------------------------------------------
+// VirtualAlloc
+//---------------------------------------------
+
+static void* win_virtual_alloc_prim_once(void* addr, size_t size, size_t try_alignment, DWORD flags) {
+  #if (MI_INTPTR_SIZE >= 8)
+  // on 64-bit systems, try to use the virtual address area after 2TiB for 4MiB aligned allocations
+  if (addr == NULL) {
+    void* hint = _mi_os_get_aligned_hint(try_alignment,size);
+    if (hint != NULL) {
+      void* p = VirtualAlloc(hint, size, flags, PAGE_READWRITE);
+      if (p != NULL) return p;
+      _mi_verbose_message("warning: unable to allocate hinted aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), hint, try_alignment, flags);
+      // fall through on error
+    }
+  }
+  #endif
+  // on modern Windows try use VirtualAlloc2 for aligned allocation
+  if (addr == NULL && try_alignment > win_allocation_granularity && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) {
+    MI_MEM_ADDRESS_REQUIREMENTS reqs = { 0, 0, 0 };
+    reqs.Alignment = try_alignment;
+    MI_MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} };
+    param.Type.Type = MiMemExtendedParameterAddressRequirements;
+    param.Arg.Pointer = &reqs;
+    void* p = (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, &param, 1);
+    if (p != NULL) return p;
+    _mi_warning_message("unable to allocate aligned OS memory (0x%zx bytes, error code: 0x%x, address: %p, alignment: 0x%zx, flags: 0x%x)\n", size, GetLastError(), addr, try_alignment, flags);
+    // fall through on error
+  }
+  // last resort
+  return VirtualAlloc(addr, size, flags, PAGE_READWRITE);
+}
+
+static bool win_is_out_of_memory_error(DWORD err) {
+  switch (err) {
+    case ERROR_COMMITMENT_MINIMUM:
+    case ERROR_COMMITMENT_LIMIT:
+    case ERROR_PAGEFILE_QUOTA:
+    case ERROR_NOT_ENOUGH_MEMORY:
+      return true;
+    default:
+      return false;
+  }
+}
+
+static void* win_virtual_alloc_prim(void* addr, size_t size, size_t try_alignment, DWORD flags) {
+  long max_retry_msecs = mi_option_get_clamp(mi_option_retry_on_oom, 0, 2000);  // at most 2 seconds
+  if (max_retry_msecs == 1) { max_retry_msecs = 100; }  // if one sets the option to "true"
+  for (long tries = 1; tries <= 10; tries++) {          // try at most 10 times (=2200ms)
+    void* p = win_virtual_alloc_prim_once(addr, size, try_alignment, flags);
+    if (p != NULL) {
+      // success, return the address
+      return p;
+    }
+    else if (max_retry_msecs > 0 && (try_alignment <= 8*MI_MiB) &&
+              (flags&MEM_COMMIT) != 0 && (flags&MEM_LARGE_PAGES) == 0 &&
+              win_is_out_of_memory_error(GetLastError())) {
+      // if committing regular memory and being out-of-memory,
+      // keep trying for a bit in case memory frees up after all. See issue #894
+      _mi_warning_message("out-of-memory on OS allocation, try again... (attempt %lu, 0x%zx bytes, error code: 0x%x, address: %p, alignment: 0x%zx, flags: 0x%x)\n", tries, size, GetLastError(), addr, try_alignment, flags);
+      long sleep_msecs = tries*40;  // increasing waits
+      if (sleep_msecs > max_retry_msecs) { sleep_msecs = max_retry_msecs; }
+      max_retry_msecs -= sleep_msecs;
+      Sleep(sleep_msecs);
+    }
+    else {
+      // otherwise return with an error
+      break;
+    }
+  }
+  return NULL;
+}
+
+static void* win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only, bool allow_large, bool* is_large) {
+  mi_assert_internal(!(large_only && !allow_large));
+  static _Atomic(size_t) large_page_try_ok; // = 0;
+  void* p = NULL;
+  // Try to allocate large OS pages (2MiB) if allowed or required.
+  if ((large_only || (_mi_os_canuse_large_page(size, try_alignment) && mi_option_is_enabled(mi_option_allow_large_os_pages)))
+      && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0)
+  {
+    size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok);
+    if (!large_only && try_ok > 0) {
+      // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive.
+      // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times.
+      mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1);
+    }
+    else {
+      // large OS pages must always reserve and commit.
+      *is_large = true;
+      p = win_virtual_alloc_prim(addr, size, try_alignment, flags | MEM_LARGE_PAGES);
+      if (large_only) return p;
+      // fall back to non-large page allocation on error (`p == NULL`).
+      if (p == NULL) {
+        mi_atomic_store_release(&large_page_try_ok,10UL);  // on error, don't try again for the next N allocations
+      }
+    }
+  }
+  // Fall back to regular page allocation
+  if (p == NULL) {
+    *is_large = ((flags&MEM_LARGE_PAGES) != 0);
+    p = win_virtual_alloc_prim(addr, size, try_alignment, flags);
+  }
+  //if (p == NULL) { _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x, large only: %d, allow large: %d)\n", size, GetLastError(), addr, try_alignment, flags, large_only, allow_large); }
+  return p;
+}
+
+int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
+  mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
+  mi_assert_internal(commit || !allow_large);
+  mi_assert_internal(try_alignment > 0);
+  *is_zero = true;
+  int flags = MEM_RESERVE;
+  if (commit) { flags |= MEM_COMMIT; }
+  *addr = win_virtual_alloc(hint_addr, size, try_alignment, flags, false, allow_large, is_large);
+  return (*addr != NULL ? 0 : (int)GetLastError());
+}
+
+
+//---------------------------------------------
+// Commit/Reset/Protect
+//---------------------------------------------
+#ifdef _MSC_VER
+#pragma warning(disable:6250)   // suppress warning calling VirtualFree without MEM_RELEASE (for decommit)
+#endif
+
+int _mi_prim_commit(void* addr, size_t size, bool* is_zero) {
+  *is_zero = false;
+  /*
+  // zero'ing only happens on an initial commit... but checking upfront seems expensive..
+  _MEMORY_BASIC_INFORMATION meminfo; _mi_memzero_var(meminfo);
+  if (VirtualQuery(addr, &meminfo, size) > 0) {
+    if ((meminfo.State & MEM_COMMIT) == 0) {
+      *is_zero = true;
+    }
+  }
+  */
+  // commit
+  void* p = VirtualAlloc(addr, size, MEM_COMMIT, PAGE_READWRITE);
+  if (p == NULL) return (int)GetLastError();
+  return 0;
+}
+
+int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) {
+  BOOL ok = VirtualFree(addr, size, MEM_DECOMMIT);
+  *needs_recommit = true;  // for safety, assume always decommitted even in the case of an error.
+  return (ok ? 0 : (int)GetLastError());
+}
+
+int _mi_prim_reset(void* addr, size_t size) {
+  void* p = VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
+  mi_assert_internal(p == addr);
+  #if 0
+  if (p != NULL) {
+    VirtualUnlock(addr,size); // VirtualUnlock after MEM_RESET removes the memory directly from the working set
+  }
+  #endif
+  return (p != NULL ? 0 : (int)GetLastError());
+}
+
+int _mi_prim_reuse(void* addr, size_t size) {
+  MI_UNUSED(addr); MI_UNUSED(size);
+  return 0;
+}
+
+int _mi_prim_protect(void* addr, size_t size, bool protect) {
+  DWORD oldprotect = 0;
+  BOOL ok = VirtualProtect(addr, size, protect ? PAGE_NOACCESS : PAGE_READWRITE, &oldprotect);
+  return (ok ? 0 : (int)GetLastError());
+}
+
+
+//---------------------------------------------
+// Huge page allocation
+//---------------------------------------------
+
+static void* _mi_prim_alloc_huge_os_pagesx(void* hint_addr, size_t size, int numa_node)
+{
+  const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE;
+
+  win_enable_large_os_pages(NULL);
+
+  MI_MEM_EXTENDED_PARAMETER params[3] = { {{0,0},{0}},{{0,0},{0}},{{0,0},{0}} };
+  // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages
+  static bool mi_huge_pages_available = true;
+  if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) {
+    params[0].Type.Type = MiMemExtendedParameterAttributeFlags;
+    params[0].Arg.ULong64 = MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE;
+    ULONG param_count = 1;
+    if (numa_node >= 0) {
+      param_count++;
+      params[1].Type.Type = MiMemExtendedParameterNumaNode;
+      params[1].Arg.ULong = (unsigned)numa_node;
+    }
+    SIZE_T psize = size;
+    void* base = hint_addr;
+    LONG err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count);
+    if (err == 0 && base != NULL) {
+      return base;
+    }
+    else {
+      // fall back to regular large pages
+      mi_huge_pages_available = false; // don't try further huge pages
+      _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err);
+    }
+  }
+  // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation
+  if (pVirtualAlloc2 != NULL && numa_node >= 0) {
+    params[0].Type.Type = MiMemExtendedParameterNumaNode;
+    params[0].Arg.ULong = (unsigned)numa_node;
+    return (*pVirtualAlloc2)(GetCurrentProcess(), hint_addr, size, flags, PAGE_READWRITE, params, 1);
+  }
+
+  // otherwise use regular virtual alloc on older windows
+  return VirtualAlloc(hint_addr, size, flags, PAGE_READWRITE);
+}
+
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
+  *is_zero = true;
+  *addr = _mi_prim_alloc_huge_os_pagesx(hint_addr,size,numa_node);
+  return (*addr != NULL ? 0 : (int)GetLastError());
+}
+
+
+//---------------------------------------------
+// Numa nodes
+//---------------------------------------------
+
+size_t _mi_prim_numa_node(void) {
+  USHORT numa_node = 0;
+  if (pGetCurrentProcessorNumberEx != NULL && pGetNumaProcessorNodeEx != NULL) {
+    // Extended API is supported
+    MI_PROCESSOR_NUMBER pnum;
+    (*pGetCurrentProcessorNumberEx)(&pnum);
+    USHORT nnode = 0;
+    BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode);
+    if (ok) { numa_node = nnode; }
+  }
+  else if (pGetNumaProcessorNode != NULL) {
+    // Vista or earlier, use older API that is limited to 64 processors. Issue #277
+    DWORD pnum = GetCurrentProcessorNumber();
+    UCHAR nnode = 0;
+    BOOL ok = pGetNumaProcessorNode((UCHAR)pnum, &nnode);
+    if (ok) { numa_node = nnode; }
+  }
+  return numa_node;
+}
+
+size_t _mi_prim_numa_node_count(void) {
+  ULONG numa_max = 0;
+  if (pGetNumaHighestNodeNumber!=NULL) {
+    (*pGetNumaHighestNodeNumber)(&numa_max);
+  }
+  // find the highest node number that has actual processors assigned to it. Issue #282
+  while (numa_max > 0) {
+    if (pGetNumaNodeProcessorMaskEx != NULL) {
+      // Extended API is supported
+      GROUP_AFFINITY affinity;
+      if ((*pGetNumaNodeProcessorMaskEx)((USHORT)numa_max, &affinity)) {
+        if (affinity.Mask != 0) break;  // found the maximum non-empty node
+      }
+    }
+    else {
+      // Vista or earlier, use older API that is limited to 64 processors.
+      ULONGLONG mask;
+      if (pGetNumaNodeProcessorMask != NULL) {
+        if ((*pGetNumaNodeProcessorMask)((UCHAR)numa_max, &mask)) {
+          if (mask != 0) break; // found the maximum non-empty node
+        }
+      };
+    }
+    // max node was invalid or had no processor assigned, try again
+    numa_max--;
+  }
+  return ((size_t)numa_max + 1);
+}
+
+
+//----------------------------------------------------------------
+// Clock
+//----------------------------------------------------------------
+
+static mi_msecs_t mi_to_msecs(LARGE_INTEGER t) {
+  static LARGE_INTEGER mfreq; // = 0
+  if (mfreq.QuadPart == 0LL) {
+    LARGE_INTEGER f;
+    QueryPerformanceFrequency(&f);
+    mfreq.QuadPart = f.QuadPart/1000LL;
+    if (mfreq.QuadPart == 0) mfreq.QuadPart = 1;
+  }
+  return (mi_msecs_t)(t.QuadPart / mfreq.QuadPart);
+}
+
+mi_msecs_t _mi_prim_clock_now(void) {
+  LARGE_INTEGER t;
+  QueryPerformanceCounter(&t);
+  return mi_to_msecs(t);
+}
+
+
+//----------------------------------------------------------------
+// Process Info
+//----------------------------------------------------------------
+
+#include <psapi.h>
+
+static mi_msecs_t filetime_msecs(const FILETIME* ftime) {
+  ULARGE_INTEGER i;
+  i.LowPart = ftime->dwLowDateTime;
+  i.HighPart = ftime->dwHighDateTime;
+  mi_msecs_t msecs = (i.QuadPart / 10000); // FILETIME is in 100 nano seconds
+  return msecs;
+}
+
+typedef BOOL (WINAPI *PGetProcessMemoryInfo)(HANDLE, PPROCESS_MEMORY_COUNTERS, DWORD);
+static PGetProcessMemoryInfo pGetProcessMemoryInfo = NULL;
+
+void _mi_prim_process_info(mi_process_info_t* pinfo)
+{
+  FILETIME ct;
+  FILETIME ut;
+  FILETIME st;
+  FILETIME et;
+  GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut);
+  pinfo->utime = filetime_msecs(&ut);
+  pinfo->stime = filetime_msecs(&st);
+
+  // load psapi on demand
+  if (pGetProcessMemoryInfo == NULL) {
+    HINSTANCE hDll = LoadLibrary(TEXT("psapi.dll"));
+    if (hDll != NULL) {
+      pGetProcessMemoryInfo = (PGetProcessMemoryInfo)(void (*)(void))GetProcAddress(hDll, "GetProcessMemoryInfo");
+    }
+  }
+
+  // get process info
+  PROCESS_MEMORY_COUNTERS info; _mi_memzero_var(info);
+  if (pGetProcessMemoryInfo != NULL) {
+    pGetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info));
+  }
+  pinfo->current_rss    = (size_t)info.WorkingSetSize;
+  pinfo->peak_rss       = (size_t)info.PeakWorkingSetSize;
+  pinfo->current_commit = (size_t)info.PagefileUsage;
+  pinfo->peak_commit    = (size_t)info.PeakPagefileUsage;
+  pinfo->page_faults    = (size_t)info.PageFaultCount;
+}
+
+//----------------------------------------------------------------
+// Output
+//----------------------------------------------------------------
+
+void _mi_prim_out_stderr( const char* msg )
+{
+  // on windows with redirection, the C runtime cannot handle locale dependent output
+  // after the main thread closes so we use direct console output.
+  if (!_mi_preloading()) {
+    // _cputs(msg);  // _cputs cannot be used as it aborts when failing to lock the console
+    static HANDLE hcon = INVALID_HANDLE_VALUE;
+    static bool hconIsConsole = false;
+    if (hcon == INVALID_HANDLE_VALUE) {
+      hcon = GetStdHandle(STD_ERROR_HANDLE);
+      #ifdef MI_HAS_CONSOLE_IO
+      CONSOLE_SCREEN_BUFFER_INFO sbi;
+      hconIsConsole = ((hcon != INVALID_HANDLE_VALUE) && GetConsoleScreenBufferInfo(hcon, &sbi));
+      #endif
+    }
+    const size_t len = _mi_strlen(msg);
+    if (len > 0 && len < UINT32_MAX) {
+      DWORD written = 0;
+      if (hconIsConsole) {
+        #ifdef MI_HAS_CONSOLE_IO
+        WriteConsoleA(hcon, msg, (DWORD)len, &written, NULL);
+        #endif
+      }
+      else if (hcon != INVALID_HANDLE_VALUE) {
+        // use direct write if stderr was redirected
+        WriteFile(hcon, msg, (DWORD)len, &written, NULL);
+      }
+      else {
+        // finally fall back to fputs after all
+        fputs(msg, stderr);
+      }
+    }
+  }
+}
+
+
+//----------------------------------------------------------------
+// Environment
+//----------------------------------------------------------------
+
+// On Windows use GetEnvironmentVariable instead of getenv to work
+// reliably even when this is invoked before the C runtime is initialized.
+// i.e. when `_mi_preloading() == true`.
+// Note: on windows, environment names are not case sensitive.
+bool _mi_prim_getenv(const char* name, char* result, size_t result_size) {
+  result[0] = 0;
+  size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size);
+  return (len > 0 && len < result_size);
+}
+
+
+//----------------------------------------------------------------
+// Random
+//----------------------------------------------------------------
+
+#if defined(MI_USE_RTLGENRANDOM) // || defined(__cplusplus)
+// We prefer to use BCryptGenRandom instead of (the unofficial) RtlGenRandom but when using
+// dynamic overriding, we observed it can raise an exception when compiled with C++, and
+// sometimes deadlocks when also running under the VS debugger.
+// In contrast, issue #623 implies that on Windows Server 2019 we need to use BCryptGenRandom.
+// To be continued..
+#pragma comment (lib,"advapi32.lib")
+#define RtlGenRandom  SystemFunction036
+mi_decl_externc BOOLEAN NTAPI RtlGenRandom(PVOID RandomBuffer, ULONG RandomBufferLength);
+
+bool _mi_prim_random_buf(void* buf, size_t buf_len) {
+  return (RtlGenRandom(buf, (ULONG)buf_len) != 0);
+}
+
+#else
+
+#ifndef BCRYPT_USE_SYSTEM_PREFERRED_RNG
+#define BCRYPT_USE_SYSTEM_PREFERRED_RNG 0x00000002
+#endif
+
+typedef LONG (NTAPI *PBCryptGenRandom)(HANDLE, PUCHAR, ULONG, ULONG);
+static  PBCryptGenRandom pBCryptGenRandom = NULL;
+
+bool _mi_prim_random_buf(void* buf, size_t buf_len) {
+  if (pBCryptGenRandom == NULL) {
+    HINSTANCE hDll = LoadLibrary(TEXT("bcrypt.dll"));
+    if (hDll != NULL) {
+      pBCryptGenRandom = (PBCryptGenRandom)(void (*)(void))GetProcAddress(hDll, "BCryptGenRandom");
+    }
+    if (pBCryptGenRandom == NULL) return false;
+  }
+  return (pBCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0);
+}
+
+#endif  // MI_USE_RTLGENRANDOM
+
+
+//----------------------------------------------------------------
+// Thread pool?
+//----------------------------------------------------------------
+
+bool _mi_prim_thread_is_in_threadpool(void) {
+#if (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64)
+  if (win_major_version >= 6) {
+    // check if this thread belongs to a windows threadpool
+    // see: <https://www.geoffchappell.com/studies/windows/km/ntoskrnl/inc/api/pebteb/teb/index.htm>
+    struct _TEB* const teb = NtCurrentTeb();
+    void* const pool_data = *((void**)((uint8_t*)teb + (MI_SIZE_BITS == 32 ? 0x0F90 : 0x1778)));
+    return (pool_data != NULL);
+  }
+#endif
+  return false;
+}
+
+
+//----------------------------------------------------------------
+// Process & Thread Init/Done
+//----------------------------------------------------------------
+
+#if MI_WIN_USE_FIXED_TLS==1
+mi_decl_cache_align size_t _mi_win_tls_offset = 0;
+#endif
+
+//static void mi_debug_out(const char* s) {
+//  HANDLE h = GetStdHandle(STD_ERROR_HANDLE);
+//  WriteConsole(h, s, (DWORD)_mi_strlen(s), NULL, NULL);
+//}
+
+static void mi_win_tls_init(DWORD reason) {
+  if (reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) {
+    #if MI_WIN_USE_FIXED_TLS==1  // we must allocate a TLS slot dynamically
+    if (_mi_win_tls_offset == 0 && reason == DLL_PROCESS_ATTACH) {
+      const DWORD tls_slot = TlsAlloc();  // usually returns slot 1
+      if (tls_slot == TLS_OUT_OF_INDEXES) {
+        _mi_error_message(EFAULT, "unable to allocate the a TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n");
+      }
+      _mi_win_tls_offset = (size_t)tls_slot * sizeof(void*);
+    }
+    #endif
+    #if MI_HAS_TLS_SLOT >= 2  // we must initialize the TLS slot before any allocation
+    if (_mi_theap_default() == NULL) {
+      _mi_theap_default_set((mi_theap_t*)&_mi_theap_empty);
+      #if MI_DEBUG && MI_WIN_USE_FIXED_TLS==1
+      void* const p = TlsGetValue((DWORD)(_mi_win_tls_offset / sizeof(void*)));
+      mi_assert_internal(p == (void*)&_mi_theap_empty);
+      #endif
+    }
+    #endif
+  }
+}
+
+static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
+  MI_UNUSED(reserved);
+  MI_UNUSED(module);
+  mi_win_tls_init(reason);
+  if (reason==DLL_PROCESS_ATTACH) {
+    _mi_auto_process_init();
+  }
+  else if (reason==DLL_PROCESS_DETACH) {
+    _mi_auto_process_done();
+  }
+  else if (reason==DLL_THREAD_DETACH && !_mi_is_redirected()) {
+    _mi_thread_done(NULL);
+  }
+}
+
+
+#if defined(MI_SHARED_LIB)
+  #define MI_PRIM_HAS_PROCESS_ATTACH  1
+
+  // Windows DLL: easy to hook into process_init and thread_done
+  BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) {
+    mi_win_main((PVOID)inst,reason,reserved);
+    return TRUE;
+  }
+
+  // nothing to do since `_mi_thread_done` is handled through the DLL_THREAD_DETACH event.
+  void _mi_prim_thread_init_auto_done(void) { }
+  void _mi_prim_thread_done_auto_done(void) { }
+  void _mi_prim_thread_associate_default_theap(mi_theap_t* theap) {
+    MI_UNUSED(theap);
+  }
+
+#elif !defined(MI_WIN_USE_FLS)
+  #define MI_PRIM_HAS_PROCESS_ATTACH  1
+
+  static void NTAPI mi_win_main_attach(PVOID module, DWORD reason, LPVOID reserved) {
+    if (reason == DLL_PROCESS_ATTACH || reason == DLL_THREAD_ATTACH) {
+      mi_win_main(module, reason, reserved);
+    }
+  }
+  static void NTAPI mi_win_main_detach(PVOID module, DWORD reason, LPVOID reserved) {
+    if (reason == DLL_PROCESS_DETACH || reason == DLL_THREAD_DETACH) {
+      mi_win_main(module, reason, reserved);
+    }
+  }
+
+  // Set up TLS callbacks in a statically linked library by using special data sections.
+  // See <https://stackoverflow.com/questions/14538159/tls-callback-in-windows>
+  // We use 2 entries to ensure we call attach events before constructors
+  // are called, and detach events after destructors are called.
+  #if defined(__cplusplus)
+  extern "C" {
+  #endif
+
+  #if defined(_WIN64)
+    #pragma comment(linker, "/INCLUDE:_tls_used")
+    #pragma comment(linker, "/INCLUDE:_mi_tls_callback_pre")
+    #pragma comment(linker, "/INCLUDE:_mi_tls_callback_post")
+    #pragma const_seg(".CRT$XLB")
+    extern const PIMAGE_TLS_CALLBACK _mi_tls_callback_pre[];
+    const PIMAGE_TLS_CALLBACK _mi_tls_callback_pre[] = { &mi_win_main_attach };
+    #pragma const_seg()
+    #pragma const_seg(".CRT$XLY")
+    extern const PIMAGE_TLS_CALLBACK _mi_tls_callback_post[];
+    const PIMAGE_TLS_CALLBACK _mi_tls_callback_post[] = { &mi_win_main_detach };
+    #pragma const_seg()
+  #else
+    #pragma comment(linker, "/INCLUDE:__tls_used")
+    #pragma comment(linker, "/INCLUDE:__mi_tls_callback_pre")
+    #pragma comment(linker, "/INCLUDE:__mi_tls_callback_post")
+    #pragma data_seg(".CRT$XLB")
+    PIMAGE_TLS_CALLBACK _mi_tls_callback_pre[] = { &mi_win_main_attach };
+    #pragma data_seg()
+    #pragma data_seg(".CRT$XLY")
+    PIMAGE_TLS_CALLBACK _mi_tls_callback_post[] = { &mi_win_main_detach };
+    #pragma data_seg()
+  #endif
+
+  #if defined(__cplusplus)
+  }
+  #endif
+
+  // nothing to do since `_mi_thread_done` is handled through the DLL_THREAD_DETACH event.
+  void _mi_prim_thread_init_auto_done(void) { }
+  void _mi_prim_thread_done_auto_done(void) { }
+  void _mi_prim_thread_associate_default_theap(mi_theap_t* theap) {
+    MI_UNUSED(theap);
+  }
+
+#else // deprecated: statically linked, use fiber api
+
+  #if defined(_MSC_VER) // on clang/gcc use the constructor attribute (in `src/prim/prim.c`)
+    // MSVC: use data section magic for static libraries
+    // See <https://www.codeguru.com/cpp/misc/misc/applicationcontrol/article.php/c6945/Running-Code-Before-and-After-Main.htm>
+    #define MI_PRIM_HAS_PROCESS_ATTACH 1
+
+    static int mi_process_attach(void) {
+      mi_win_main(NULL,DLL_PROCESS_ATTACH,NULL);
+      atexit(&_mi_auto_process_done);
+      return 0;
+    }
+    typedef int(*mi_crt_callback_t)(void);
+    #if defined(_WIN64)
+      #pragma comment(linker, "/INCLUDE:_mi_tls_callback")
+      #pragma section(".CRT$XIU", long, read)
+    #else
+      #pragma comment(linker, "/INCLUDE:__mi_tls_callback")
+    #endif
+    #pragma data_seg(".CRT$XIU")
+    mi_decl_externc mi_crt_callback_t _mi_tls_callback[] = { &mi_process_attach };
+    #pragma data_seg()
+  #endif
+
+  // use the fiber api for calling `_mi_thread_done`.
+  #include <fibersapi.h>
+  #if (_WIN32_WINNT < 0x600)  // before Windows Vista
+  WINBASEAPI DWORD WINAPI FlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback );
+  WINBASEAPI PVOID WINAPI FlsGetValue( _In_ DWORD dwFlsIndex );
+  WINBASEAPI BOOL  WINAPI FlsSetValue( _In_ DWORD dwFlsIndex, _In_opt_ PVOID lpFlsData );
+  WINBASEAPI BOOL  WINAPI FlsFree(_In_ DWORD dwFlsIndex);
+  #endif
+
+  static DWORD mi_fls_key = (DWORD)(-1);
+
+  static void NTAPI mi_fls_done(PVOID value) {
+    mi_theap_t* theap = (mi_theap_t*)value;
+    if (theap != NULL) {
+      _mi_thread_done(theap);
+      FlsSetValue(mi_fls_key, NULL);  // prevent recursion as _mi_thread_done may set it back to the main theap, issue #672
+    }
+  }
+
+  void _mi_prim_thread_init_auto_done(void) {
+    mi_fls_key = FlsAlloc(&mi_fls_done);
+  }
+
+  void _mi_prim_thread_done_auto_done(void) {
+    // call thread-done on all threads (except the main thread) to prevent
+    // dangling callback pointer if statically linked with a DLL; Issue #208
+    FlsFree(mi_fls_key);
+  }
+
+  void _mi_prim_thread_associate_default_theap(mi_theap_t* theap) {
+    mi_assert_internal(mi_fls_key != (DWORD)(-1));
+    FlsSetValue(mi_fls_key, theap);
+  }
+#endif
+
+// ----------------------------------------------------
+// Communicate with the redirection module on Windows
+// ----------------------------------------------------
+#if defined(MI_SHARED_LIB) && !defined(MI_WIN_NOREDIRECT)
+  #define MI_PRIM_HAS_ALLOCATOR_INIT 1
+
+  static bool mi_redirected = false;   // true if malloc redirects to mi_malloc
+
+  bool _mi_is_redirected(void) {
+    return mi_redirected;
+  }
+
+  #ifdef __cplusplus
+  extern "C" {
+  #endif
+  mi_decl_export void _mi_redirect_entry(DWORD reason) {
+    // called on redirection; careful as this may be called before DllMain
+    mi_win_tls_init(reason);
+    if (reason == DLL_PROCESS_ATTACH) {
+      mi_redirected = true;
+    }
+    else if (reason == DLL_PROCESS_DETACH) {
+      mi_redirected = false;
+    }
+    else if (reason == DLL_THREAD_DETACH) {
+      _mi_thread_done(NULL);
+    }
+  }
+  __declspec(dllimport) bool mi_cdecl mi_allocator_init(const char** message);
+  __declspec(dllimport) void mi_cdecl mi_allocator_done(void);
+  #ifdef __cplusplus
+  }
+  #endif
+  bool _mi_allocator_init(const char** message) {
+    return mi_allocator_init(message);
+  }
+  void _mi_allocator_done(void) {
+    mi_allocator_done();
+  }
+#endif
+
diff --git a/3rd/mimalloc-3.2.6/src/prim/windows/readme.md b/3rd/mimalloc-3.2.6/src/prim/windows/readme.md
new file mode 100644
index 00000000..217c3d17
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/prim/windows/readme.md
@@ -0,0 +1,17 @@
+## Primitives:
+
+- `prim.c` contains Windows primitives for OS allocation.
+
+## Event Tracing for Windows (ETW)
+
+- `etw.h` is generated from `etw.man` which contains the manifest for mimalloc events.
+  (100 is an allocation, 101 is for a free)
+
+- `etw-mimalloc.wprp` is a profile for the Windows Performance Recorder (WPR).
+  In an admin prompt, you can use:
+  ```
+  > wpr -start src\prim\windows\etw-mimalloc.wprp -filemode
+  > <my mimalloc program>
+  > wpr -stop test.etl
+  ``` 
+  and then open `test.etl` in the Windows Performance Analyzer (WPA).
\ No newline at end of file
diff --git a/3rd/mimalloc-2.0.9/src/random.c b/3rd/mimalloc-3.2.6/src/random.c
similarity index 58%
rename from 3rd/mimalloc-2.0.9/src/random.c
rename to 3rd/mimalloc-3.2.6/src/random.c
index 06d4ba4a..990e4894 100644
--- a/3rd/mimalloc-2.0.9/src/random.c
+++ b/3rd/mimalloc-3.2.6/src/random.c
@@ -4,14 +4,9 @@ This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
 -----------------------------------------------------------------------------*/
-#ifndef _DEFAULT_SOURCE
-#define _DEFAULT_SOURCE   // for syscall() on Linux
-#endif
-
 #include "mimalloc.h"
-#include "mimalloc-internal.h"
-
-#include <string.h> // memset
+#include "mimalloc/internal.h"
+#include "mimalloc/prim.h"    // _mi_prim_random_buf
 
 /* ----------------------------------------------------------------------------
 We use our own PRNG to keep predictable performance of random number generation
@@ -37,15 +32,11 @@ The implementation uses regular C code which compiles very well on modern compil
 (gcc x64 has no register spills, and clang 6+ uses SSE instructions)
 -----------------------------------------------------------------------------*/
 
-static inline uint32_t rotl(uint32_t x, uint32_t shift) {
-  return (x << shift) | (x >> (32 - shift));
-}
-
 static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d) {
-  x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 16);
-  x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 12);
-  x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 8);
-  x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7);
+  x[a] += x[b]; x[d] = mi_rotl32(x[d] ^ x[a], 16);
+  x[c] += x[d]; x[b] = mi_rotl32(x[b] ^ x[c], 12);
+  x[a] += x[b]; x[d] = mi_rotl32(x[d] ^ x[a], 8);
+  x[c] += x[d]; x[b] = mi_rotl32(x[b] ^ x[c], 7);
 }
 
 static void chacha_block(mi_random_ctx_t* ctx)
@@ -103,7 +94,7 @@ static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t no
   // since we only use chacha for randomness (and not encryption) we
   // do not _need_ to read 32-bit values as little endian but we do anyways
   // just for being compatible :-)
-  memset(ctx, 0, sizeof(*ctx));
+  _mi_memzero(ctx, sizeof(*ctx));
   for (size_t i = 0; i < 4; i++) {
     const uint8_t* sigma = (uint8_t*)"expand 32-byte k";
     ctx->input[i] = read32(sigma,i);
@@ -118,7 +109,7 @@ static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t no
 }
 
 static void chacha_split(mi_random_ctx_t* ctx, uint64_t nonce, mi_random_ctx_t* ctx_new) {
-  memset(ctx_new, 0, sizeof(*ctx_new));
+  _mi_memzero(ctx_new, sizeof(*ctx_new));
   _mi_memcpy(ctx_new->input, ctx->input, sizeof(ctx_new->input));
   ctx_new->input[12] = 0;
   ctx_new->input[13] = 0;
@@ -147,173 +138,31 @@ void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* ctx_new) {
 
 uintptr_t _mi_random_next(mi_random_ctx_t* ctx) {
   mi_assert_internal(mi_random_is_initialized(ctx));
-  #if MI_INTPTR_SIZE <= 4
-    return chacha_next32(ctx);
-  #elif MI_INTPTR_SIZE == 8
-    return (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx));
-  #else
-  # error "define mi_random_next for this platform"
-  #endif
+  uintptr_t r;
+  do {
+    #if MI_INTPTR_SIZE <= 4
+    r = chacha_next32(ctx);
+    #elif MI_INTPTR_SIZE == 8
+    r = (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx));
+    #else
+    # error "define mi_random_next for this platform"
+    #endif
+  } while (r==0);
+  return r;
 }
 
 
 /* ----------------------------------------------------------------------------
-To initialize a fresh random context we rely on the OS:
-- Windows     : BCryptGenRandom (or RtlGenRandom)
-- macOS       : CCRandomGenerateBytes, arc4random_buf
-- bsd,wasi    : arc4random_buf
-- Linux       : getrandom,/dev/urandom
+To initialize a fresh random context.
 If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR.
 -----------------------------------------------------------------------------*/
 
-#if defined(_WIN32)
-
-#if defined(MI_USE_RTLGENRANDOM) // || defined(__cplusplus)
-// We prefer to use BCryptGenRandom instead of (the unofficial) RtlGenRandom but when using
-// dynamic overriding, we observed it can raise an exception when compiled with C++, and
-// sometimes deadlocks when also running under the VS debugger.
-// In contrast, issue #623 implies that on Windows Server 2019 we need to use BCryptGenRandom.
-// To be continued..
-#pragma comment (lib,"advapi32.lib")
-#define RtlGenRandom  SystemFunction036
-#ifdef __cplusplus
-extern "C" {
-#endif
-BOOLEAN NTAPI RtlGenRandom(PVOID RandomBuffer, ULONG RandomBufferLength);
-#ifdef __cplusplus
-}
-#endif
-static bool os_random_buf(void* buf, size_t buf_len) {
-  return (RtlGenRandom(buf, (ULONG)buf_len) != 0);
-}
-#else
-
-#ifndef BCRYPT_USE_SYSTEM_PREFERRED_RNG
-#define BCRYPT_USE_SYSTEM_PREFERRED_RNG 0x00000002
-#endif
-
-typedef LONG (NTAPI *PBCryptGenRandom)(HANDLE, PUCHAR, ULONG, ULONG);
-static  PBCryptGenRandom pBCryptGenRandom = NULL;
-
-static bool os_random_buf(void* buf, size_t buf_len) {
-  if (pBCryptGenRandom == NULL) {
-    HINSTANCE hDll = LoadLibrary(TEXT("bcrypt.dll"));
-    if (hDll != NULL) {
-      pBCryptGenRandom = (PBCryptGenRandom)(void (*)(void))GetProcAddress(hDll, "BCryptGenRandom");
-    }
-  }
-  if (pBCryptGenRandom == NULL) {
-    return false;
-  }
-  else {
-    return (pBCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0);
-  }
-}
-#endif
-
-#elif defined(__APPLE__)
-#include <AvailabilityMacros.h>
-#if defined(MAC_OS_X_VERSION_10_10) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_10
-#include <CommonCrypto/CommonCryptoError.h>
-#include <CommonCrypto/CommonRandom.h>
-#endif
-static bool os_random_buf(void* buf, size_t buf_len) {
-  #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15
-    // We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf
-    // may fail silently on macOS. See PR #390, and <https://opensource.apple.com/source/Libc/Libc-1439.40.11/gen/FreeBSD/arc4random.c.auto.html>
-    return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess);
-  #else
-    // fall back on older macOS
-    arc4random_buf(buf, buf_len);
-    return true;
-  #endif
-}
-
-#elif defined(__ANDROID__) || defined(__DragonFly__) || \
-      defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
-      defined(__sun) // todo: what to use with __wasi__?
-#include <stdlib.h>
-static bool os_random_buf(void* buf, size_t buf_len) {
-  arc4random_buf(buf, buf_len);
-  return true;
-}
-#elif defined(__linux__) || defined(__HAIKU__)
-#if defined(__linux__)
-#include <sys/syscall.h>
-#endif
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <errno.h>
-static bool os_random_buf(void* buf, size_t buf_len) {
-  // Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h`
-  // and for the latter the actual `getrandom` call is not always defined.
-  // (see <https://stackoverflow.com/questions/45237324/why-doesnt-getrandom-compile>)
-  // We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed.
-#ifdef SYS_getrandom
-  #ifndef GRND_NONBLOCK
-  #define GRND_NONBLOCK (1)
-  #endif
-  static _Atomic(uintptr_t) no_getrandom; // = 0
-  if (mi_atomic_load_acquire(&no_getrandom)==0) {
-    ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK);
-    if (ret >= 0) return (buf_len == (size_t)ret);
-    if (errno != ENOSYS) return false;
-    mi_atomic_store_release(&no_getrandom, 1UL); // don't call again, and fall back to /dev/urandom
-  }
-#endif
-  int flags = O_RDONLY;
-  #if defined(O_CLOEXEC)
-  flags |= O_CLOEXEC;
-  #endif
-  int fd = open("/dev/urandom", flags, 0);
-  if (fd < 0) return false;
-  size_t count = 0;
-  while(count < buf_len) {
-    ssize_t ret = read(fd, (char*)buf + count, buf_len - count);
-    if (ret<=0) {
-      if (errno!=EAGAIN && errno!=EINTR) break;
-    }
-    else {
-      count += ret;
-    }
-  }
-  close(fd);
-  return (count==buf_len);
-}
-#else
-static bool os_random_buf(void* buf, size_t buf_len) {
-  return false;
-}
-#endif
-
-#if defined(_WIN32)
-#include <windows.h>
-#elif defined(__APPLE__)
-#include <mach/mach_time.h>
-#else
-#include <time.h>
-#endif
-
 uintptr_t _mi_os_random_weak(uintptr_t extra_seed) {
   uintptr_t x = (uintptr_t)&_mi_os_random_weak ^ extra_seed; // ASLR makes the address random
-
-  #if defined(_WIN32)
-    LARGE_INTEGER pcount;
-    QueryPerformanceCounter(&pcount);
-    x ^= (uintptr_t)(pcount.QuadPart);
-  #elif defined(__APPLE__)
-    x ^= (uintptr_t)mach_absolute_time();
-  #else
-    struct timespec time;
-    clock_gettime(CLOCK_MONOTONIC, &time);
-    x ^= (uintptr_t)time.tv_sec;
-    x ^= (uintptr_t)time.tv_nsec;
-  #endif
+  x ^= _mi_prim_clock_now();
   // and do a few randomization steps
   uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1;
-  for (uintptr_t i = 0; i < max; i++) {
+  for (uintptr_t i = 0; i < max || x==0; i++, x++) {
     x = _mi_random_shuffle(x);
   }
   mi_assert_internal(x != 0);
@@ -322,14 +171,14 @@ uintptr_t _mi_os_random_weak(uintptr_t extra_seed) {
 
 static void mi_random_init_ex(mi_random_ctx_t* ctx, bool use_weak) {
   uint8_t key[32];
-  if (use_weak || !os_random_buf(key, sizeof(key))) {
+  if (use_weak || !_mi_prim_random_buf(key, sizeof(key))) {
     // if we fail to get random data from the OS, we fall back to a
     // weak random source based on the current time
     #if !defined(__wasi__)
     if (!use_weak) { _mi_warning_message("unable to use secure randomness\n"); }
     #endif
     uintptr_t x = _mi_os_random_weak(0);
-    for (size_t i = 0; i < 8; i++) {  // key is eight 32-bit words.
+    for (size_t i = 0; i < 8; i++, x++) {  // key is eight 32-bit words.
       x = _mi_random_shuffle(x);
       ((uint32_t*)key)[i] = (uint32_t)x;
     }
diff --git a/3rd/mimalloc-2.0.9/src/static.c b/3rd/mimalloc-3.2.6/src/static.c
similarity index 75%
rename from 3rd/mimalloc-2.0.9/src/static.c
rename to 3rd/mimalloc-3.2.6/src/static.c
index 5b34ddbb..2383f659 100644
--- a/3rd/mimalloc-2.0.9/src/static.c
+++ b/3rd/mimalloc-3.2.6/src/static.c
@@ -14,26 +14,30 @@ terms of the MIT license. A copy of the license can be found in the file
 #endif
 
 #include "mimalloc.h"
-#include "mimalloc-internal.h"
+#include "mimalloc/internal.h"
 
 // For a static override we create a single object file
 // containing the whole library. If it is linked first
 // it will override all the standard library allocation
 // functions (on Unix's).
-#include "stats.c"
-#include "random.c"
-#include "os.c"
-#include "bitmap.c"
-#include "arena.c"
-#include "segment-cache.c"
-#include "segment.c"
-#include "page.c"
-#include "heap.c"
-#include "alloc.c"
+#include "alloc.c"          // includes alloc-override.c and free.c
 #include "alloc-aligned.c"
 #include "alloc-posix.c"
-#if MI_OSX_ZONE
-#include "alloc-override-osx.c"
-#endif
+#include "arena.c"
+#include "arena-meta.c"
+#include "bitmap.c"
+#include "heap.c"
 #include "init.c"
+#include "libc.c"
 #include "options.c"
+#include "os.c"
+#include "page.c"           // includes page-queue.c
+#include "page-map.c"
+#include "random.c"
+#include "stats.c"
+#include "theap.c"
+#include "threadlocal.c"
+#include "prim/prim.c"
+#if MI_OSX_ZONE
+#include "prim/osx/alloc-override-zone.c"
+#endif
diff --git a/3rd/mimalloc-3.2.6/src/stats.c b/3rd/mimalloc-3.2.6/src/stats.c
new file mode 100644
index 00000000..84b8ad7e
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/stats.c
@@ -0,0 +1,790 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2018-2025, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+#include "mimalloc.h"
+#include "mimalloc-stats.h"
+#include "mimalloc/internal.h"
+#include "mimalloc/atomic.h"
+#include "mimalloc/prim.h"
+
+#include <string.h> // memset
+
+#if defined(_MSC_VER) && (_MSC_VER < 1920)
+#pragma warning(disable:4204)  // non-constant aggregate initializer
+#endif
+
+/* -----------------------------------------------------------
+  Statistics operations
+----------------------------------------------------------- */
+
+static void mi_stat_update_mt(mi_stat_count_t* stat, int64_t amount) {
+  if (amount == 0) return;
+  // add atomically
+  int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount);
+  mi_atomic_maxi64_relaxed(&stat->peak, current + amount);
+  if (amount > 0) {
+    mi_atomic_addi64_relaxed(&stat->total, amount);
+  }
+}
+
+static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
+  if (amount == 0) return;
+  // add thread local
+  stat->current += amount;
+  if (stat->current > stat->peak) { stat->peak = stat->current; }
+  if (amount > 0) { stat->total += amount; }
+}
+
+
+void __mi_stat_counter_increase_mt(mi_stat_counter_t* stat, size_t amount) {
+  mi_atomic_addi64_relaxed(&stat->total, (int64_t)amount);
+}
+
+void __mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) {
+  stat->total += amount;
+}
+
+void __mi_stat_increase_mt(mi_stat_count_t* stat, size_t amount) {
+  mi_stat_update_mt(stat, (int64_t)amount);
+}
+void __mi_stat_increase(mi_stat_count_t* stat, size_t amount) {
+  mi_stat_update(stat, (int64_t)amount);
+}
+
+void __mi_stat_decrease_mt(mi_stat_count_t* stat, size_t amount) {
+  mi_stat_update_mt(stat, -((int64_t)amount));
+}
+void __mi_stat_decrease(mi_stat_count_t* stat, size_t amount) {
+  mi_stat_update(stat, -((int64_t)amount));
+}
+
+
+// Adjust stats to compensate; for example before committing a range,
+// first adjust downwards with parts that were already committed so
+// we avoid double counting.
+static void mi_stat_adjust_mt(mi_stat_count_t* stat, int64_t amount) {
+  if (amount == 0) return;
+  // adjust atomically
+  mi_atomic_addi64_relaxed(&stat->current, amount);
+  mi_atomic_addi64_relaxed(&stat->total, amount);
+}
+
+static void mi_stat_adjust(mi_stat_count_t* stat, int64_t amount) {
+  if (amount == 0) return;
+  stat->current += amount;
+  stat->total += amount;
+}
+
+void __mi_stat_adjust_increase_mt(mi_stat_count_t* stat, size_t amount) {
+  mi_stat_adjust_mt(stat, (int64_t)amount);
+}
+void __mi_stat_adjust_increase(mi_stat_count_t* stat, size_t amount) {
+  mi_stat_adjust(stat, (int64_t)amount);
+}
+void __mi_stat_adjust_decrease_mt(mi_stat_count_t* stat, size_t amount) {
+  mi_stat_adjust_mt(stat, -((int64_t)amount));
+}
+void __mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount) {
+  mi_stat_adjust(stat, -((int64_t)amount));
+}
+
+
+// must be thread safe as it is called from stats_merge
+static void mi_stat_count_add_mt(mi_stat_count_t* stat, const mi_stat_count_t* src) {
+  if (stat==src) return;
+  mi_atomic_void_addi64_relaxed(&stat->total, &src->total);
+  const int64_t prev_current = mi_atomic_addi64_relaxed(&stat->current, src->current);
+
+  // Global current plus thread peak approximates new global peak
+  // note: peak scores do really not work across threads.
+  // we used to just add them together but that often overestimates in practice.
+  // similarly, max does not seem to work well. The current approach
+  // by Artem Kharytoniuk (@artem-lunarg) seems to work better, see PR#1112
+  // for a longer description.
+  mi_atomic_maxi64_relaxed(&stat->peak, prev_current + src->peak);
+}
+
+static void mi_stat_counter_add_mt(mi_stat_counter_t* stat, const mi_stat_counter_t* src) {
+  if (stat==src) return;
+  mi_atomic_void_addi64_relaxed(&stat->total, &src->total);
+}
+
+#define MI_STAT_COUNT(stat)    mi_stat_count_add_mt(&stats->stat, &src->stat);
+#define MI_STAT_COUNTER(stat)  mi_stat_counter_add_mt(&stats->stat, &src->stat);
+
+// must be thread safe as it is called from stats_merge
+static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
+  if (stats==src) return;
+
+  // copy all fields
+  MI_STAT_FIELDS()
+
+  #if MI_STAT>1
+  for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
+    mi_stat_count_add_mt(&stats->malloc_bins[i], &src->malloc_bins[i]);
+  }
+  #endif
+  for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
+    mi_stat_count_add_mt(&stats->page_bins[i], &src->page_bins[i]);
+  }
+}
+
+#undef MI_STAT_COUNT
+#undef MI_STAT_COUNTER
+
+/* -----------------------------------------------------------
+  Display statistics
+----------------------------------------------------------- */
+
+// unit > 0 : size in binary bytes
+// unit == 0: count as decimal
+// unit < 0 : count in binary
+static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg, const char* fmt) {
+  char buf[32]; _mi_memzero_var(buf);
+  int  len = 32;
+  const char* suffix = (unit <= 0 ? " " : "B");
+  const int64_t base = (unit == 0 ? 1000 : 1024);
+  if (unit>0) n *= unit;
+
+  const int64_t pos = (n < 0 ? -n : n);
+  if (pos < base) {
+    if (n!=1 || suffix[0] != 'B') {  // skip printing 1 B for the unit column
+      _mi_snprintf(buf, len, "%lld   %-3s", (long long)n, (n==0 ? "" : suffix));
+    }
+  }
+  else {
+    int64_t divider = base;
+    const char* magnitude = "K";
+    if (pos >= divider*base) { divider *= base; magnitude = "M"; }
+    if (pos >= divider*base) { divider *= base; magnitude = "G"; }
+    const int64_t tens = (n / (divider/10));
+    const long whole = (long)(tens/10);
+    const long frac1 = (long)(tens%10);
+    char unitdesc[8];
+    _mi_snprintf(unitdesc, 8, "%s%s%s", magnitude, (base==1024 ? "i" : ""), suffix);
+    _mi_snprintf(buf, len, "%ld.%ld %-3s", whole, (frac1 < 0 ? -frac1 : frac1), unitdesc);
+  }
+  _mi_fprintf(out, arg, (fmt==NULL ? "%12s" : fmt), buf);
+}
+
+
+static void mi_print_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg) {
+  mi_printf_amount(n,unit,out,arg,NULL);
+}
+
+static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out, void* arg) {
+  if (unit==1) _mi_fprintf(out, arg, "%12s"," ");
+          else mi_print_amount(n,0,out,arg);
+}
+
+static void mi_stat_print_ex(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg, const char* notok ) {
+  _mi_fprintf(out, arg,"  %-10s:", msg);
+  if (unit != 0) {
+    if (unit > 0) {
+      mi_print_amount(stat->peak, unit, out, arg);
+      mi_print_amount(stat->total, unit, out, arg);
+      // mi_print_amount(stat->freed, unit, out, arg);
+      mi_print_amount(stat->current, unit, out, arg);
+      mi_print_amount(unit, 1, out, arg);
+      mi_print_count(stat->total, unit, out, arg);
+    }
+    else {
+      mi_print_amount(stat->peak, -1, out, arg);
+      mi_print_amount(stat->total, -1, out, arg);
+      // mi_print_amount(stat->freed, -1, out, arg);
+      mi_print_amount(stat->current, -1, out, arg);
+      if (unit == -1) {
+        _mi_fprintf(out, arg, "%24s", "");
+      }
+      else {
+        mi_print_amount(-unit, 1, out, arg);
+        mi_print_count((stat->total / -unit), 0, out, arg);
+      }
+    }
+    if (stat->current != 0) {
+      _mi_fprintf(out, arg, "  ");
+      _mi_fprintf(out, arg, (notok == NULL ? "not all freed" : notok));
+      _mi_fprintf(out, arg, "\n");
+    }
+    else {
+      _mi_fprintf(out, arg, "  ok\n");
+    }
+  }
+  else {
+    mi_print_amount(stat->peak, 1, out, arg);
+    mi_print_amount(stat->total, 1, out, arg);
+    _mi_fprintf(out, arg, "%11s", " ");  // no freed
+    mi_print_amount(stat->current, 1, out, arg);
+    _mi_fprintf(out, arg, "\n");
+  }
+}
+
+static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg) {
+  mi_stat_print_ex(stat, msg, unit, out, arg, NULL);
+}
+
+#if MI_STAT>1
+static void mi_stat_total_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg) {
+  _mi_fprintf(out, arg, "  %-10s:", msg);
+  _mi_fprintf(out, arg, "%12s", " ");  // no peak
+  mi_print_amount(stat->total, unit, out, arg);
+  _mi_fprintf(out, arg, "\n");
+}
+#endif
+
+static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) {
+  _mi_fprintf(out, arg, "  %-10s:", msg);
+  mi_print_amount(stat->total, -1, out, arg);
+  _mi_fprintf(out, arg, "\n");
+}
+
+
+static void mi_stat_average_print(size_t count, size_t total, const char* msg, mi_output_fun* out, void* arg) {
+  const int64_t avg_tens = (count == 0 ? 0 : (total*10 / count));
+  const long avg_whole = (long)(avg_tens/10);
+  const long avg_frac1 = (long)(avg_tens%10);
+  _mi_fprintf(out, arg, "  %-10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1);
+}
+
+
+static void mi_print_header(const char* name,mi_output_fun* out, void* arg ) {
+  _mi_fprintf(out, arg, " %-11s %11s %11s %11s %11s %11s\n",
+                        name, "peak   ", "total   ", "current   ", "block   ", "total#   ");
+}
+
+#if MI_STAT>1
+static bool mi_stats_print_bins(const mi_stat_count_t* bins, size_t max, mi_output_fun* out, void* arg) {
+  bool found = false;
+  char buf[64];
+  for (size_t i = 0; i <= max; i++) {
+    if (bins[i].total > 0) {
+      found = true;
+      const size_t unit = _mi_bin_size((uint8_t)i);
+      const char* pagekind = (unit <= MI_SMALL_MAX_OBJ_SIZE ? "S" :
+                               (unit <= MI_MEDIUM_MAX_OBJ_SIZE ? "M" :
+                                 (unit <= MI_LARGE_MAX_OBJ_SIZE ? "L" : "H")));
+      _mi_snprintf(buf, 64, "bin%2s  %3lu", pagekind, (long)i);
+      mi_stat_print(&bins[i], buf, (int64_t)unit, out, arg);
+    }
+  }
+  if (found) {
+    _mi_fprintf(out, arg, "\n");
+  }
+  return found;
+}
+#endif
+
+
+
+//------------------------------------------------------------
+// Use an output wrapper for line-buffered output
+// (which is nice when using loggers etc.)
+//------------------------------------------------------------
+typedef struct buffered_s {
+  mi_output_fun* out;   // original output function
+  void*          arg;   // and state
+  char*          buf;   // local buffer of at least size `count+1`
+  size_t         used;  // currently used chars `used <= count`
+  size_t         count; // total chars available for output
+} buffered_t;
+
+static void mi_buffered_flush(buffered_t* buf) {
+  buf->buf[buf->used] = 0;
+  _mi_fputs(buf->out, buf->arg, NULL, buf->buf);
+  buf->used = 0;
+}
+
+static void mi_cdecl mi_buffered_out(const char* msg, void* arg) {
+  buffered_t* buf = (buffered_t*)arg;
+  if (msg==NULL || buf==NULL) return;
+  for (const char* src = msg; *src != 0; src++) {
+    char c = *src;
+    if (buf->used >= buf->count) mi_buffered_flush(buf);
+    mi_assert_internal(buf->used < buf->count);
+    buf->buf[buf->used++] = c;
+    if (c == '\n') mi_buffered_flush(buf);
+  }
+}
+
+//------------------------------------------------------------
+// Print statistics
+//------------------------------------------------------------
+
+mi_decl_export void mi_process_info_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept
+{
+  size_t elapsed;
+  size_t user_time;
+  size_t sys_time;
+  size_t current_rss;
+  size_t peak_rss;
+  size_t current_commit;
+  size_t peak_commit;
+  size_t page_faults;
+  mi_process_info(&elapsed, &user_time, &sys_time, &current_rss, &peak_rss, &current_commit, &peak_commit, &page_faults);
+  _mi_fprintf(out, arg, "  %-10s: %5zu.%03zu s\n", "elapsed", elapsed/1000, elapsed%1000);
+  _mi_fprintf(out, arg, "  %-10s: user: %zu.%03zu s, system: %zu.%03zu s, faults: %zu, peak rss: ", "process",
+    user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, page_faults);
+  mi_printf_amount((int64_t)peak_rss, 1, out, arg, "%s");
+  if (peak_commit > 0) {
+    _mi_fprintf(out, arg, ", peak commit: ");
+    mi_printf_amount((int64_t)peak_commit, 1, out, arg, "%s");
+  }
+  _mi_fprintf(out, arg, "\n");
+}
+
+void _mi_stats_print(const char* name, size_t id, mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_attr_noexcept {
+  // wrap the output function to be line buffered
+  char buf[256]; _mi_memzero_var(buf);
+  buffered_t buffer = { out0, arg0, NULL, 0, 255 };
+  buffer.buf = buf;
+  mi_output_fun* out = &mi_buffered_out;
+  void* arg = &buffer;
+
+  // and print using that
+  _mi_fprintf(out, arg, "%s %zu\n", name, id);
+
+  if (stats->malloc_normal.total + stats->malloc_huge.total != 0) {
+    #if MI_STAT>1
+    mi_print_header("blocks", out, arg);
+    mi_stats_print_bins(stats->malloc_bins, MI_BIN_HUGE, out, arg);
+    #endif
+    #if MI_STAT
+    mi_stat_print(&stats->malloc_normal, "binned", (stats->malloc_normal_count.total == 0 ? 1 : -1), out, arg);
+    mi_stat_print(&stats->malloc_huge, "huge", (stats->malloc_huge_count.total == 0 ? 1 : -1), out, arg);
+    mi_stat_count_t total = { 0,0,0 };
+    mi_stat_count_add_mt(&total, &stats->malloc_normal);
+    mi_stat_count_add_mt(&total, &stats->malloc_huge);
+    mi_stat_print_ex(&total, "total", 1, out, arg, "");
+    #if MI_STAT>1
+    mi_stat_total_print(&stats->malloc_requested, "malloc req", 1, out, arg);
+    #endif
+    _mi_fprintf(out, arg, "\n");
+    #endif
+  }
+
+  if (stats->pages.total != 0) {
+    mi_print_header("pages", out, arg);
+    mi_stat_print_ex(&stats->page_committed, "touched", 1, out, arg, "");
+    // mi_stat_print(&stats->segments, "segments", -1, out, arg);
+    // mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out, arg);
+    // mi_stat_print(&stats->segments_cache, "-cached", -1, out, arg);
+    mi_stat_print(&stats->pages, "pages", -1, out, arg);
+    mi_stat_print(&stats->pages_abandoned, "abandoned", -1, out, arg);
+    mi_stat_counter_print(&stats->pages_reclaim_on_alloc, "reclaima", out, arg);
+    mi_stat_counter_print(&stats->pages_reclaim_on_free, "reclaimf", out, arg);
+    mi_stat_counter_print(&stats->pages_reabandon_full, "reabandon", out, arg);
+    mi_stat_counter_print(&stats->pages_unabandon_busy_wait, "waits", out, arg);
+    mi_stat_counter_print(&stats->pages_extended, "extended", out, arg);
+    mi_stat_counter_print(&stats->pages_retire, "retire", out, arg);
+    mi_stat_average_print(stats->page_searches_count.total, stats->page_searches.total, "searches", out, arg);
+    _mi_fprintf(out, arg, "\n");
+  }
+
+  if (stats->arena_count.total > 0) {
+    mi_print_header("arenas", out, arg);
+    mi_stat_print_ex(&stats->reserved, "reserved", 1, out, arg, "");
+    mi_stat_print_ex(&stats->committed, "committed", 1, out, arg, "");
+    mi_stat_counter_print(&stats->reset, "reset", out, arg);
+    mi_stat_counter_print(&stats->purged, "purged", out, arg);
+
+    mi_stat_counter_print(&stats->arena_count, "arenas", out, arg);
+    mi_stat_counter_print(&stats->arena_rollback_count, "rollback", out, arg);
+    mi_stat_counter_print(&stats->mmap_calls, "mmaps", out, arg);
+    mi_stat_counter_print(&stats->commit_calls, "commits", out, arg);
+    mi_stat_counter_print(&stats->reset_calls, "resets", out, arg);
+    mi_stat_counter_print(&stats->purge_calls, "purges", out, arg);
+    mi_stat_counter_print(&stats->malloc_guarded_count, "guarded", out, arg);
+    mi_stat_print_ex(&stats->heaps, "heaps", -1, out, arg, "");
+    _mi_fprintf(out, arg, "\n");
+
+    mi_print_header("process", out, arg);
+    mi_stat_print_ex(&stats->threads, "threads", -1, out, arg, "");
+    _mi_fprintf(out, arg, "  %-10s: %5i\n", "numa nodes", _mi_os_numa_node_count());
+    mi_process_info_print_out(out, arg);
+  }
+  _mi_fprintf(out, arg, "\n");
+}
+
+
+static mi_msecs_t mi_process_start; // = 0
+
+// called on process init
+void _mi_stats_init(void) {
+  if (mi_process_start == 0) { mi_process_start = _mi_clock_start(); };
+}
+
+static void mi_stats_add_into(mi_stats_t* to, mi_stats_t* from) {
+  mi_assert_internal(to != NULL && from != NULL);
+  if (to == from) return;
+  mi_stats_add(to, from);
+}
+
+void _mi_stats_merge_into(mi_stats_t* to, mi_stats_t* from) {
+  mi_assert_internal(to != NULL && from != NULL);
+  if (to == from) return;
+  mi_stats_add(to, from);
+  _mi_memzero(from, sizeof(mi_stats_t));
+}
+
+static mi_stats_t* mi_stats_merge_theap_to_heap(mi_theap_t* theap) mi_attr_noexcept {
+  mi_stats_t* stats = &theap->stats;
+  mi_stats_t* heap_stats = &theap->heap->stats;
+  _mi_stats_merge_into( heap_stats, stats );
+  return heap_stats;
+}
+
+static mi_stats_t* mi_heap_get_stats(mi_heap_t* heap) {
+  if (heap==NULL) { heap = mi_heap_main(); }
+  mi_theap_t* theap = _mi_heap_theap_peek(heap);
+  if (theap==NULL) return &heap->stats;
+              else return mi_stats_merge_theap_to_heap(theap);
+}
+
+// deprecated
+void mi_stats_reset(void) mi_attr_noexcept {
+  if (!mi_theap_is_initialized(_mi_theap_default())) return;
+  mi_heap_get_stats(mi_heap_main());
+  mi_heap_stats_merge_to_subproc(mi_heap_main());
+}
+
+
+void mi_heap_stats_print_out(mi_heap_t* heap, mi_output_fun* out, void* arg) mi_attr_noexcept {
+  if (heap==NULL) { heap = mi_heap_main(); }
+  _mi_stats_print("heap", heap->heap_seq, mi_heap_get_stats(heap), out, arg);
+}
+
+typedef struct mi_heap_print_visit_info_s {
+  mi_output_fun* out;
+  void* out_arg;
+} mi_heap_print_visit_info_t;
+
+static bool mi_cdecl mi_heap_print_visitor(mi_heap_t* heap, void* arg) {
+  mi_heap_print_visit_info_t* vinfo = (mi_heap_print_visit_info_t*)(arg);
+  mi_heap_stats_print_out(heap, vinfo->out, vinfo->out_arg);
+  return true;
+}
+
+
+// show each heap and then the subproc
+void mi_subproc_heap_stats_print_out(mi_subproc_id_t subproc_id, mi_output_fun* out, void* arg) mi_attr_noexcept {  
+  mi_subproc_t* subproc = _mi_subproc_from_id(subproc_id);
+  if (subproc==NULL) return;
+  mi_heap_print_visit_info_t vinfo = { out, arg };
+  mi_subproc_visit_heaps(subproc, &mi_heap_print_visitor, &vinfo);
+  _mi_stats_print("subproc", subproc->subproc_seq, &subproc->stats, out, arg);
+}
+
+
+// aggregate all stats from the heaps and subproc and print those
+void mi_subproc_stats_print_out(mi_subproc_id_t subproc_id, mi_output_fun* out, void* arg) mi_attr_noexcept {
+  mi_subproc_t* subproc = _mi_subproc_from_id(subproc_id);
+  if (subproc==NULL) return;
+  mi_stats_t stats; 
+  mi_stats_get(sizeof(stats), &stats);
+  _mi_stats_print("subproc", subproc->subproc_seq, &stats, out, arg);
+}
+
+void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept {
+  mi_subproc_stats_print_out(mi_subproc_current(),out, arg);
+}
+
+// deprecated
+void mi_stats_print(void* out) mi_attr_noexcept {
+  // for compatibility there is an `out` parameter (which can be `stdout` or `stderr`)
+  mi_stats_print_out((mi_output_fun*)out, NULL);
+}
+
+// deprecated
+void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept {
+  mi_theap_t* theap = _mi_theap_default();
+  if (theap==NULL || !mi_theap_is_initialized(theap)) return;
+  _mi_stats_print("heap", theap->heap->heap_seq, &theap->stats, out, arg);
+  mi_stats_merge_theap_to_heap(_mi_theap_default());
+}
+
+
+// ----------------------------------------------------------------
+// Basic timer for convenience; use milli-seconds to avoid doubles
+// ----------------------------------------------------------------
+
+static mi_msecs_t mi_clock_diff;
+
+mi_msecs_t _mi_clock_now(void) {
+  return _mi_prim_clock_now();
+}
+
+mi_msecs_t _mi_clock_start(void) {
+  if (mi_clock_diff == 0.0) {
+    mi_msecs_t t0 = _mi_clock_now();
+    mi_clock_diff = _mi_clock_now() - t0;
+  }
+  return _mi_clock_now();
+}
+
+mi_msecs_t _mi_clock_end(mi_msecs_t start) {
+  mi_msecs_t end = _mi_clock_now();
+  return (end - start - mi_clock_diff);
+}
+
+
+// --------------------------------------------------------
+// Basic process statistics
+// --------------------------------------------------------
+
+mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept
+{
+  mi_heap_t* const heap = mi_heap_main();
+  mi_process_info_t pinfo;
+  _mi_memzero_var(pinfo);
+  pinfo.elapsed        = _mi_clock_end(mi_process_start);
+  pinfo.current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)(&heap->stats.committed.current)));
+  pinfo.peak_commit    = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)(&heap->stats.committed.peak)));
+  pinfo.current_rss    = pinfo.current_commit;
+  pinfo.peak_rss       = pinfo.peak_commit;
+  pinfo.utime          = 0;
+  pinfo.stime          = 0;
+  pinfo.page_faults    = 0;
+
+  _mi_prim_process_info(&pinfo);
+
+  if (elapsed_msecs!=NULL)  *elapsed_msecs  = (pinfo.elapsed < 0 ? 0 : (pinfo.elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.elapsed : PTRDIFF_MAX));
+  if (user_msecs!=NULL)     *user_msecs     = (pinfo.utime < 0 ? 0 : (pinfo.utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.utime : PTRDIFF_MAX));
+  if (system_msecs!=NULL)   *system_msecs   = (pinfo.stime < 0 ? 0 : (pinfo.stime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.stime : PTRDIFF_MAX));
+  if (current_rss!=NULL)    *current_rss    = pinfo.current_rss;
+  if (peak_rss!=NULL)       *peak_rss       = pinfo.peak_rss;
+  if (current_commit!=NULL) *current_commit = pinfo.current_commit;
+  if (peak_commit!=NULL)    *peak_commit    = pinfo.peak_commit;
+  if (page_faults!=NULL)    *page_faults    = pinfo.page_faults;
+}
+
+mi_decl_export void mi_process_info_print(void) mi_attr_noexcept {
+  mi_process_info_print_out(NULL, NULL);
+}
+
+
+// --------------------------------------------------------
+// Return statistics
+// --------------------------------------------------------
+
+size_t mi_stats_get_bin_size(size_t bin) mi_attr_noexcept {
+  if (bin > MI_BIN_HUGE) return 0;
+  return _mi_bin_size(bin);
+}
+
+static void _mi_stats_get(mi_stats_t* stats_in, size_t stats_size, mi_stats_t* stats_out) mi_attr_noexcept {
+  if (stats_out == NULL || stats_size == 0) return;
+  _mi_memzero(stats_out, stats_size);
+  if (stats_in == NULL) return;
+  const size_t size = (stats_size > sizeof(mi_stats_t) ? sizeof(mi_stats_t) : stats_size);
+  _mi_memcpy(stats_out, stats_in, size);
+  stats_out->version = MI_STAT_VERSION;
+}
+
+void mi_subproc_stats_get(mi_subproc_id_t subproc_id, size_t stats_size, mi_stats_t* stats) mi_attr_noexcept {
+  _mi_stats_get(&_mi_subproc_from_id(subproc_id)->stats, stats_size, stats);
+}
+
+void mi_heap_stats_get(mi_heap_t* heap, size_t stats_size, mi_stats_t* stats) mi_attr_noexcept {
+  _mi_stats_get(mi_heap_get_stats(heap), stats_size, stats);
+}
+
+
+static bool mi_cdecl mi_heap_aggregate_visitor(mi_heap_t* heap, void* arg) {
+  mi_stats_t* stats = (mi_stats_t*)arg;
+  mi_stats_add_into(stats, &heap->stats);
+  return true;
+}
+
+static void mi_subproc_aggregate_stats(mi_subproc_id_t subproc_id, size_t stats_size, mi_stats_t* stats) {
+  mi_subproc_t* subproc = _mi_subproc_from_id(subproc_id);
+  if (stats==NULL || stats_size==0) return;
+  _mi_memzero(stats, stats_size);
+  if (stats_size < sizeof(mi_stats_t)) return;
+  mi_subproc_visit_heaps(subproc, &mi_heap_aggregate_visitor, stats);
+  mi_stats_add_into(stats, &subproc->stats);
+}
+
+void mi_stats_get(size_t stats_size, mi_stats_t* stats) mi_attr_noexcept {
+  mi_subproc_aggregate_stats(mi_subproc_current(), stats_size, stats);
+}
+
+
+// --------------------------------------------------------
+// Statics in json format
+// --------------------------------------------------------
+
+typedef struct mi_json_buf_s {
+  char*   buf;
+  size_t  size;
+  size_t  used;
+  bool    can_realloc;
+} mi_json_buf_t;
+
+static bool mi_json_buf_expand(mi_json_buf_t* hbuf) {
+  if (hbuf==NULL) return false;
+  if (hbuf->buf != NULL && hbuf->size>0) {
+    hbuf->buf[hbuf->size-1] = 0;
+  }
+  if (hbuf->size > SIZE_MAX/2 || !hbuf->can_realloc) return false;
+  const size_t newsize = (hbuf->size == 0 ? mi_good_size(12*MI_KiB) : 2*hbuf->size);
+  char* const  newbuf  = (char*)mi_rezalloc(hbuf->buf, newsize);
+  if (newbuf == NULL) return false;
+  hbuf->buf = newbuf;
+  hbuf->size = newsize;
+  return true;
+}
+
+static void mi_json_buf_print(mi_json_buf_t* hbuf, const char* msg) {
+  if (msg==NULL || hbuf==NULL) return;
+  if (hbuf->used + 1 >= hbuf->size && !hbuf->can_realloc) return;
+  for (const char* src = msg; *src != 0; src++) {
+    char c = *src;
+    if (hbuf->used + 1 >= hbuf->size) {
+      if (!mi_json_buf_expand(hbuf)) return;
+    }
+    mi_assert_internal(hbuf->used < hbuf->size);
+    hbuf->buf[hbuf->used++] = c;
+  }
+  mi_assert_internal(hbuf->used < hbuf->size);
+  hbuf->buf[hbuf->used] = 0;
+}
+
+static void mi_json_buf_print_count_bin(mi_json_buf_t* hbuf, const char* prefix, mi_stat_count_t* stat, size_t bin, bool add_comma) {
+  const size_t binsize = mi_stats_get_bin_size(bin);
+  const size_t pagesize = (binsize <= MI_SMALL_MAX_OBJ_SIZE ? MI_SMALL_PAGE_SIZE :
+                            (binsize <= MI_MEDIUM_MAX_OBJ_SIZE ? MI_MEDIUM_PAGE_SIZE :
+                              (binsize <= MI_LARGE_MAX_OBJ_SIZE ? MI_LARGE_PAGE_SIZE : 0)));
+  char buf[128];
+  _mi_snprintf(buf, 128, "%s{ \"total\": %lld, \"peak\": %lld, \"current\": %lld, \"block_size\": %zu, \"page_size\": %zu }%s\n", prefix, stat->total, stat->peak, stat->current, binsize, pagesize, (add_comma ? "," : ""));
+  buf[127] = 0;
+  mi_json_buf_print(hbuf, buf);
+}
+
+static void mi_json_buf_print_count_cbin(mi_json_buf_t* hbuf, const char* prefix, mi_stat_count_t* stat, mi_chunkbin_t bin, bool add_comma) {
+  const char* cbin = " ";
+  switch(bin) {
+    case MI_CBIN_SMALL:  cbin = "S"; break;
+    case MI_CBIN_MEDIUM: cbin = "M"; break;
+    case MI_CBIN_LARGE:  cbin = "L"; break;
+    case MI_CBIN_HUGE:   cbin = "H"; break;
+    case MI_CBIN_OTHER:  cbin = "X"; break;
+    default: cbin = " "; break;
+  }
+  char buf[128];
+  _mi_snprintf(buf, 128, "%s{ \"total\": %lld, \"peak\": %lld, \"current\": %lld, \"bin\": \"%s\" }%s\n", prefix, stat->total, stat->peak, stat->current, cbin, (add_comma ? "," : ""));
+  buf[127] = 0;
+  mi_json_buf_print(hbuf, buf);
+}
+
+static void mi_json_buf_print_count(mi_json_buf_t* hbuf, const char* prefix, mi_stat_count_t* stat, bool add_comma) {
+  char buf[128];
+  _mi_snprintf(buf, 128, "%s{ \"total\": %lld, \"peak\": %lld, \"current\": %lld }%s\n", prefix, stat->total, stat->peak, stat->current, (add_comma ? "," : ""));
+  buf[127] = 0;
+  mi_json_buf_print(hbuf, buf);
+}
+
+static void mi_json_buf_print_count_value(mi_json_buf_t* hbuf, const char* name, mi_stat_count_t* stat) {
+  char buf[128];
+  _mi_snprintf(buf, 128, "  \"%s\": ", name);
+  buf[127] = 0;
+  mi_json_buf_print(hbuf, buf);
+  mi_json_buf_print_count(hbuf, "", stat, true);
+}
+
+static void mi_json_buf_print_value(mi_json_buf_t* hbuf, const char* name, int64_t val) {
+  char buf[128];
+  _mi_snprintf(buf, 128, "  \"%s\": %lld,\n", name, val);
+  buf[127] = 0;
+  mi_json_buf_print(hbuf, buf);
+}
+
+static void mi_json_buf_print_size(mi_json_buf_t* hbuf, const char* name, size_t val, bool add_comma) {
+  char buf[128];
+  _mi_snprintf(buf, 128, "    \"%s\": %zu%s\n", name, val, (add_comma ? "," : ""));
+  buf[127] = 0;
+  mi_json_buf_print(hbuf, buf);
+}
+
+static void mi_json_buf_print_counter_value(mi_json_buf_t* hbuf, const char* name, mi_stat_counter_t* stat) {
+  mi_json_buf_print_value(hbuf, name, stat->total);
+}
+
+#define MI_STAT_COUNT(stat)    mi_json_buf_print_count_value(&hbuf, #stat, &stats->stat);
+#define MI_STAT_COUNTER(stat)  mi_json_buf_print_counter_value(&hbuf, #stat, &stats->stat);
+
+static char* mi_stats_get_json_from(mi_stats_t* stats, size_t output_size, char* output_buf) mi_attr_noexcept {
+  mi_json_buf_t hbuf = { NULL, 0, 0, true };
+  if (output_size > 0 && output_buf != NULL) {
+    _mi_memzero(output_buf, output_size);
+    hbuf.buf = output_buf;
+    hbuf.size = output_size;
+    hbuf.can_realloc = false;
+  }
+  else {
+    if (!mi_json_buf_expand(&hbuf)) return NULL;
+  }
+  mi_json_buf_print(&hbuf, "{\n");
+  mi_json_buf_print_value(&hbuf, "version", MI_STAT_VERSION);
+  mi_json_buf_print_value(&hbuf, "mimalloc_version", MI_MALLOC_VERSION);
+
+  // process info
+  mi_json_buf_print(&hbuf, "  \"process\": {\n");
+  size_t elapsed;
+  size_t user_time;
+  size_t sys_time;
+  size_t current_rss;
+  size_t peak_rss;
+  size_t current_commit;
+  size_t peak_commit;
+  size_t page_faults;
+  mi_process_info(&elapsed, &user_time, &sys_time, &current_rss, &peak_rss, &current_commit, &peak_commit, &page_faults);
+  mi_json_buf_print_size(&hbuf, "elapsed_msecs", elapsed, true);
+  mi_json_buf_print_size(&hbuf, "user_msecs", user_time, true);
+  mi_json_buf_print_size(&hbuf, "system_msecs", sys_time, true);
+  mi_json_buf_print_size(&hbuf, "page_faults", page_faults, true);
+  mi_json_buf_print_size(&hbuf, "rss_current", current_rss, true);
+  mi_json_buf_print_size(&hbuf, "rss_peak", peak_rss, true);
+  mi_json_buf_print_size(&hbuf, "commit_current", current_commit, true);
+  mi_json_buf_print_size(&hbuf, "commit_peak", peak_commit, false);
+  mi_json_buf_print(&hbuf, "  },\n");
+
+  // statistics
+  MI_STAT_FIELDS()
+
+  // size bins
+  mi_json_buf_print(&hbuf, "  \"malloc_bins\": [\n");
+  for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
+    mi_json_buf_print_count_bin(&hbuf, "    ", &stats->malloc_bins[i], i, i!=MI_BIN_HUGE);
+  }
+  mi_json_buf_print(&hbuf, "  ],\n");
+  mi_json_buf_print(&hbuf, "  \"page_bins\": [\n");
+  for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
+    mi_json_buf_print_count_bin(&hbuf, "    ", &stats->page_bins[i], i, i!=MI_BIN_HUGE);
+  }
+  mi_json_buf_print(&hbuf, "  ],\n");
+  mi_json_buf_print(&hbuf, "  \"chunk_bins\": [\n");
+  for (size_t i = 0; i < MI_CBIN_COUNT; i++) {
+    mi_json_buf_print_count_cbin(&hbuf, "    ", &stats->chunk_bins[i], (mi_chunkbin_t)i, i!=MI_CBIN_COUNT-1);
+  }
+  mi_json_buf_print(&hbuf, "  ]\n");
+  mi_json_buf_print(&hbuf, "}\n");
+  return hbuf.buf;
+}
+
+char* mi_subproc_stats_get_json(mi_subproc_id_t subproc_id, size_t buf_size, char* buf) mi_attr_noexcept {
+  return mi_stats_get_json_from(&_mi_subproc_from_id(subproc_id)->stats, buf_size, buf);
+}
+
+char* mi_heap_stats_get_json(mi_heap_t* heap, size_t buf_size, char* buf) mi_attr_noexcept {
+  return mi_stats_get_json_from(mi_heap_get_stats(heap), buf_size, buf);
+}
+
+char* mi_stats_get_json(size_t buf_size, char* buf) mi_attr_noexcept {
+  mi_stats_t stats; 
+  mi_stats_get(sizeof(stats), &stats);
+  return mi_stats_get_json_from(&stats, buf_size, buf);
+}
+
diff --git a/3rd/mimalloc-3.2.6/src/theap.c b/3rd/mimalloc-3.2.6/src/theap.c
new file mode 100644
index 00000000..d4d89a3d
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/theap.c
@@ -0,0 +1,674 @@
+/*----------------------------------------------------------------------------
+Copyright (c) 2018-2025, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+#include "mimalloc.h"
+#include "mimalloc/internal.h"
+#include "mimalloc/prim.h"  // _mi_theap_default
+
+#if defined(_MSC_VER) && (_MSC_VER < 1920)
+#pragma warning(disable:4204)  // non-constant aggregate initializer
+#endif
+
+/* -----------------------------------------------------------
+  Helpers
+----------------------------------------------------------- */
+
+// return `true` if ok, `false` to break
+typedef bool (theap_page_visitor_fun)(mi_theap_t* theap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2);
+
+// Visit all pages in a theap; returns `false` if break was called.
+static bool mi_theap_visit_pages(mi_theap_t* theap, theap_page_visitor_fun* fn, void* arg1, void* arg2)
+{
+  if (theap==NULL || theap->page_count==0) return 0;
+
+  // visit all pages
+  #if MI_DEBUG>1
+  size_t total = theap->page_count;
+  size_t count = 0;
+  #endif
+
+  for (size_t i = 0; i <= MI_BIN_FULL; i++) {
+    mi_page_queue_t* pq = &theap->pages[i];
+    mi_page_t* page = pq->first;
+    while(page != NULL) {
+      mi_page_t* next = page->next; // save next in case the page gets removed from the queue
+      mi_assert_internal(mi_page_theap(page) == theap);
+      #if MI_DEBUG>1
+      count++;
+      #endif
+      if (!fn(theap, pq, page, arg1, arg2)) return false;
+      page = next; // and continue
+    }
+  }
+  mi_assert_internal(count == total);
+  return true;
+}
+
+
+#if MI_DEBUG>=2
+static bool mi_theap_page_is_valid(mi_theap_t* theap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
+  MI_UNUSED(arg1);
+  MI_UNUSED(arg2);
+  MI_UNUSED(pq);
+  mi_assert_internal(mi_page_theap(page) == theap);
+  mi_assert_expensive(_mi_page_is_valid(page));
+  return true;
+}
+#endif
+#if MI_DEBUG>=3
+static bool mi_theap_is_valid(mi_theap_t* theap) {
+  mi_assert_internal(theap!=NULL);
+  mi_theap_visit_pages(theap, &mi_theap_page_is_valid, NULL, NULL);
+  for (size_t bin = 0; bin < MI_BIN_COUNT; bin++) {
+    mi_assert_internal(_mi_page_queue_is_valid(theap, &theap->pages[bin]));
+  }
+  return true;
+}
+#endif
+
+
+
+
+/* -----------------------------------------------------------
+  "Collect" pages by migrating `local_free` and `thread_free`
+  lists and freeing empty pages. This is done when a thread
+  stops (and in that case abandons pages if there are still
+  blocks alive)
+----------------------------------------------------------- */
+
+typedef enum mi_collect_e {
+  MI_NORMAL,
+  MI_FORCE,
+  MI_ABANDON
+} mi_collect_t;
+
+
+static bool mi_theap_page_collect(mi_theap_t* theap, mi_page_queue_t* pq, mi_page_t* page, void* arg_collect, void* arg2 ) {
+  MI_UNUSED(arg2);
+  MI_UNUSED(theap);
+  mi_assert_internal(mi_theap_page_is_valid(theap, pq, page, NULL, NULL));
+  mi_collect_t collect = *((mi_collect_t*)arg_collect);
+  _mi_page_free_collect(page, collect >= MI_FORCE);
+  if (mi_page_all_free(page)) {
+    // no more used blocks, free the page.
+    // note: this will free retired pages as well.
+    _mi_page_free(page, pq);
+  }
+  else if (collect == MI_ABANDON) {
+    // still used blocks but the thread is done; abandon the page
+    _mi_page_abandon(page, pq);
+  }
+  return true; // don't break
+}
+
+static void mi_theap_merge_stats(mi_theap_t* theap) {
+  mi_assert_internal(mi_theap_is_initialized(theap));
+  _mi_stats_merge_into(&theap->heap->stats, &theap->stats);
+}
+
+static void mi_theap_collect_ex(mi_theap_t* theap, mi_collect_t collect)
+{
+  if (theap==NULL || !mi_theap_is_initialized(theap)) return;
+  mi_assert_expensive(mi_theap_is_valid(theap));
+
+  const bool force = (collect >= MI_FORCE);
+  _mi_deferred_free(theap, force);
+
+  // python/cpython#112532: we may be called from a thread that is not the owner of the theap
+  // const bool is_main_thread = (_mi_is_main_thread() && theap->thread_id == _mi_thread_id());
+
+  // if (_mi_is_main_thread()) { mi_debug_show_arenas(true, false, false); }
+
+  // collect retired pages
+  _mi_theap_collect_retired(theap, force);
+
+  // collect all pages owned by this thread
+  mi_theap_visit_pages(theap, &mi_theap_page_collect, &collect, NULL);
+
+  // collect arenas (this is program wide so don't force purges on abandonment of threads)
+  //mi_atomic_storei64_release(&theap->tld->subproc->purge_expire, 1);
+  _mi_arenas_collect(collect == MI_FORCE /* force purge? */, collect >= MI_FORCE /* visit all? */, theap->tld);
+
+  // merge statistics
+  mi_theap_merge_stats(theap);
+}
+
+void _mi_theap_collect_abandon(mi_theap_t* theap) {
+  mi_theap_collect_ex(theap, MI_ABANDON);
+}
+
+void mi_theap_collect(mi_theap_t* theap, bool force) mi_attr_noexcept {
+  mi_theap_collect_ex(theap, (force ? MI_FORCE : MI_NORMAL));
+}
+
+void mi_collect(bool force) mi_attr_noexcept {
+  // cannot really collect process wide, just a theap..
+  mi_theap_collect(_mi_theap_default(), force);
+}
+
+void mi_heap_collect(mi_heap_t* heap, bool force) {
+  // cannot really collect a heap, just a theap..
+  mi_theap_collect(mi_heap_theap(heap), force);
+}
+
+/* -----------------------------------------------------------
+  Heap new
+----------------------------------------------------------- */
+
+mi_theap_t* mi_theap_get_default(void) {
+  mi_theap_t* theap = _mi_theap_default();
+  if mi_unlikely(!mi_theap_is_initialized(theap)) {
+    mi_thread_init();
+    theap = _mi_theap_default();
+    mi_assert_internal(mi_theap_is_initialized(theap));
+  }
+  return theap;
+}
+
+// todo: make order of parameters consistent (but would that break compat with CPython?)
+void _mi_theap_init(mi_theap_t* theap, mi_heap_t* heap, mi_tld_t* tld)
+{
+  mi_assert_internal(theap!=NULL);
+  mi_memid_t memid = theap->memid;
+  _mi_memcpy_aligned(theap, &_mi_theap_empty, sizeof(mi_theap_t));
+  theap->memid = memid;
+  theap->heap  = heap;
+  theap->tld   = tld;  // avoid reading the thread-local tld during initialization
+  
+  _mi_theap_options_init(theap);
+  if (theap->tld->is_in_threadpool) {
+    // if we run as part of a thread pool it is better to not arbitrarily reclaim abandoned pages into our theap.
+    // this is checked in `free.c:mi_free_try_collect_mt`
+    // .. but abandoning is good in this case: halve the full page retain (possibly to 0)
+    // (so blocked threads do not hold on to too much memory)
+    if (theap->page_full_retain > 0) {
+      theap->page_full_retain = theap->page_full_retain / 4;
+    }
+  }
+
+  // push on the thread local theaps list
+  mi_theap_t* head = theap->tld->theaps;
+  theap->tprev = NULL;
+  theap->tnext = head;
+  if (head!=NULL) { head->tprev = theap; }
+  theap->tld->theaps = theap;
+
+  // initialize random
+  if (head == NULL) {  // first theap in this thread?
+    #if defined(_WIN32) && !defined(MI_SHARED_LIB)
+      _mi_random_init_weak(&theap->random);    // prevent allocation failure during bcrypt dll initialization with static linking (issue #1185)
+    #else
+      _mi_random_init(&theap->random);
+    #endif
+  }
+  else {
+    _mi_random_split(&head->random, &theap->random);
+  }
+  theap->cookie  = _mi_theap_random_next(theap) | 1;
+  _mi_theap_guarded_init(theap);
+
+  // push on the heap's theap list
+  mi_lock(&heap->theaps_lock) {
+    head = heap->theaps;
+    theap->hprev = NULL;
+    theap->hnext = head;
+    if (head!=NULL) { head->hprev = theap; }
+    heap->theaps = theap;
+  }
+}
+
+mi_theap_t* _mi_theap_create(mi_heap_t* heap, mi_tld_t* tld) {
+  mi_assert_internal(tld!=NULL);
+  mi_assert_internal(heap!=NULL);
+  // allocate and initialize a theap
+  mi_memid_t memid;
+  mi_theap_t* theap;
+  //if (!_mi_is_heap_main(heap)) {
+  //  theap = (mi_theap_t*)mi_heap_zalloc(mi_heap_main(),sizeof(mi_theap_t));
+  //  memid = _mi_memid_create(MI_MEM_HEAP_MAIN);
+  //  memid.initially_zero = memid.initially_committed = true;
+  //}
+  //else
+  if (heap->exclusive_arena == NULL) {
+    theap = (mi_theap_t*)_mi_meta_zalloc(sizeof(mi_theap_t), &memid);
+  }
+  else {
+    // theaps associated with a specific arena are allocated in that arena
+    // note: takes up at least one slice which is quite wasteful...
+    const size_t size = _mi_align_up(sizeof(mi_theap_t),MI_ARENA_MIN_OBJ_SIZE);
+    theap = (mi_theap_t*)_mi_arenas_alloc(heap, size, true, true, heap->exclusive_arena, tld->thread_seq, tld->numa_node, &memid);
+    mi_assert_internal(memid.mem.os.size >= size);
+  }
+  if (theap==NULL) {
+    _mi_error_message(ENOMEM, "unable to allocate theap meta-data\n");
+    return NULL;
+  }
+  theap->memid = memid;
+  _mi_theap_init(theap, heap, tld);
+  return theap;
+}
+
+uintptr_t _mi_theap_random_next(mi_theap_t* theap) {
+  return _mi_random_next(&theap->random);
+}
+
+// called from `mi_theap_delete` to free the internal theap resources.
+void _mi_theap_free(mi_theap_t* theap) {
+  mi_assert(theap != NULL);
+  mi_assert_internal(mi_theap_is_initialized(theap));
+  if (theap==NULL || !mi_theap_is_initialized(theap)) return;
+
+  // merge stats to the owning heap
+  mi_theap_merge_stats(theap);
+
+  // remove ourselves from the heap theaps list
+  mi_lock(&theap->heap->theaps_lock) {
+    if (theap->hnext != NULL) { theap->hnext->hprev = theap->hprev; }
+    if (theap->hprev != NULL) { theap->hprev->hnext = theap->hnext; }
+                         else { mi_assert_internal(theap->heap->theaps == theap); theap->heap->theaps = theap->hnext; }
+  }
+
+  // remove ourselves from the thread local theaps list
+  if (theap->tnext != NULL) { theap->tnext->tprev = theap->tprev;  }
+  if (theap->tprev != NULL) { theap->tprev->tnext = theap->tnext;  }
+                       else { mi_assert_internal(theap->tld->theaps == theap); theap->tld->theaps = theap->tnext; }
+
+  // and free the used memory
+  if (theap->memid.memkind == MI_MEM_HEAP_MAIN) {  // note: for now unused as it would access theap_default stats in mi_free of the current theap
+    mi_assert_internal(_mi_is_heap_main(mi_heap_of(theap)));
+    mi_free(theap);
+  }
+  else if (theap->memid.memkind == MI_MEM_META) {
+    _mi_meta_free(theap, sizeof(*theap), theap->memid);
+  }
+  else {
+    _mi_arenas_free(theap, _mi_align_up(sizeof(*theap),MI_ARENA_MIN_OBJ_SIZE), theap->memid ); // issue #1168, avoid assertion failure
+  }
+}
+
+
+/* -----------------------------------------------------------
+  Heap destroy
+----------------------------------------------------------- */
+/*
+
+// zero out the page queues
+static void mi_theap_reset_pages(mi_theap_t* theap) {
+  mi_assert_internal(theap != NULL);
+  mi_assert_internal(mi_theap_is_initialized(theap));
+  // TODO: copy full empty theap instead?
+  _mi_memset(&theap->pages_free_direct, 0, sizeof(theap->pages_free_direct));
+  _mi_memcpy_aligned(&theap->pages, &_mi_theap_empty.pages, sizeof(theap->pages));
+  // theap->thread_delayed_free = NULL;
+  theap->page_count = 0;
+}
+
+static bool _mi_theap_page_destroy(mi_theap_t* theap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
+  MI_UNUSED(arg1);
+  MI_UNUSED(arg2);
+  MI_UNUSED(pq);
+
+  // ensure no more thread_delayed_free will be added
+  //_mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
+
+  // stats
+  const size_t bsize = mi_page_block_size(page);
+  if (bsize > MI_LARGE_MAX_OBJ_SIZE) {
+    mi_theap_stat_decrease(theap, malloc_huge, bsize);
+  }
+  #if (MI_STAT>0)
+  _mi_page_free_collect(page, false);  // update used count
+  const size_t inuse = page->used;
+  if (bsize <= MI_LARGE_MAX_OBJ_SIZE) {
+    mi_theap_stat_decrease(theap, malloc_normal, bsize * inuse);
+    #if (MI_STAT>1)
+    mi_theap_stat_decrease(theap, malloc_bins[_mi_bin(bsize)], inuse);
+    #endif
+  }
+  // mi_theap_stat_decrease(theap, malloc_requested, bsize * inuse);  // todo: off for aligned blocks...
+  #endif
+
+  /// pretend it is all free now
+  mi_assert_internal(mi_page_thread_free(page) == NULL);
+  page->used = 0;
+
+  // and free the page
+  // mi_page_free(page,false);
+  page->next = NULL;
+  page->prev = NULL;
+  mi_page_set_theap(page, NULL);
+  _mi_arenas_page_free(page, theap);
+
+  return true; // keep going
+}
+
+void _mi_theap_destroy_pages(mi_theap_t* theap) {
+  mi_theap_visit_pages(theap, &_mi_theap_page_destroy, NULL, NULL);
+  mi_theap_reset_pages(theap);
+}
+
+#if MI_TRACK_HEAP_DESTROY
+static bool mi_cdecl mi_theap_track_block_free(const mi_theap_t* theap, const mi_theap_area_t* area, void* block, size_t block_size, void* arg) {
+  MI_UNUSED(theap); MI_UNUSED(area);  MI_UNUSED(arg); MI_UNUSED(block_size);
+  mi_track_free_size(block,mi_usable_size(block));
+  return true;
+}
+#endif
+
+void mi_theap_destroy(mi_theap_t* theap) {
+  mi_assert(theap != NULL);
+  mi_assert(mi_theap_is_initialized(theap));
+  mi_assert(!theap->allow_page_reclaim);
+  mi_assert(!theap->allow_page_abandon);
+  mi_assert_expensive(mi_theap_is_valid(theap));
+  if (theap==NULL || !mi_theap_is_initialized(theap)) return;
+  #if MI_GUARDED
+  // _mi_warning_message("'mi_theap_destroy' called but MI_GUARDED is enabled -- using `mi_theap_delete` instead (theap at %p)\n", theap);
+  mi_theap_delete(theap);
+  return;
+  #else
+  if (theap->allow_page_reclaim) {
+    _mi_warning_message("'mi_theap_destroy' called but ignored as the theap was not created with 'allow_destroy' (theap at %p)\n", theap);
+    // don't free in case it may contain reclaimed pages,
+    mi_theap_delete(theap);
+  }
+  else {
+    // track all blocks as freed
+    #if MI_TRACK_HEAP_DESTROY
+    mi_theap_visit_blocks(theap, true, mi_theap_track_block_free, NULL);
+    #endif
+    // free all pages
+    _mi_theap_destroy_pages(theap);
+    mi_theap_free(theap,true);
+  }
+  #endif
+}
+
+// forcefully destroy all theaps in the current thread
+void _mi_theap_unsafe_destroy_all(mi_theap_t* theap) {
+  mi_assert_internal(theap != NULL);
+  if (theap == NULL) return;
+  mi_theap_t* curr = theap->tld->theaps;
+  while (curr != NULL) {
+    mi_theap_t* next = curr->next;
+    if (!curr->allow_page_reclaim) {
+      mi_theap_destroy(curr);
+    }
+    else {
+      _mi_theap_destroy_pages(curr);
+    }
+    curr = next;
+  }
+}
+*/
+
+/* -----------------------------------------------------------
+  Safe Heap delete
+----------------------------------------------------------- */
+
+// Safe delete a theap without freeing any still allocated blocks in that theap.
+void _mi_theap_delete(mi_theap_t* theap)
+{
+  mi_assert(theap != NULL);
+  mi_assert(mi_theap_is_initialized(theap));
+  mi_assert_expensive(mi_theap_is_valid(theap));
+  if (theap==NULL || !mi_theap_is_initialized(theap)) return;
+
+  // abandon all pages
+  _mi_theap_collect_abandon(theap);
+
+  mi_assert_internal(theap->page_count==0);
+  _mi_theap_free(theap);
+}
+
+
+
+/* -----------------------------------------------------------
+  Load/unload theaps
+----------------------------------------------------------- */
+/*
+void mi_theap_unload(mi_theap_t* theap) {
+  mi_assert(mi_theap_is_initialized(theap));
+  mi_assert_expensive(mi_theap_is_valid(theap));
+  if (theap==NULL || !mi_theap_is_initialized(theap)) return;
+  if (theap->heap->exclusive_arena == NULL) {
+    _mi_warning_message("cannot unload theaps that are not associated with an exclusive arena\n");
+    return;
+  }
+
+  // abandon all pages so all thread'id in the pages are cleared
+  _mi_theap_collect_abandon(theap);
+  mi_assert_internal(theap->page_count==0);
+
+  // remove from theap list
+  mi_theap_free(theap, false); // but don't actually free the memory
+
+  // disassociate from the current thread-local and static state
+  theap->tld = NULL;
+  return;
+}
+
+bool mi_theap_reload(mi_theap_t* theap, mi_arena_id_t arena_id) {
+  mi_assert(mi_theap_is_initialized(theap));
+  if (theap==NULL || !mi_theap_is_initialized(theap)) return false;
+  if (theap->heap->exclusive_arena == NULL) {
+    _mi_warning_message("cannot reload theaps that were not associated with an exclusive arena\n");
+    return false;
+  }
+  if (theap->tld != NULL) {
+    _mi_warning_message("cannot reload theaps that were not unloaded first\n");
+    return false;
+  }
+  mi_arena_t* arena = _mi_arena_from_id(arena_id);
+  if (theap->heap->exclusive_arena != arena) {
+    _mi_warning_message("trying to reload a theap at a different arena address: %p vs %p\n", theap->heap->exclusive_arena, arena);
+    return false;
+  }
+
+  mi_assert_internal(theap->page_count==0);
+
+  // re-associate with the current thread-local and static state
+  theap->tld = mi_theap_get_default()->tld;
+
+  // reinit direct pages (as we may be in a different process)
+  mi_assert_internal(theap->page_count == 0);
+  for (size_t i = 0; i < MI_PAGES_DIRECT; i++) {
+    theap->pages_free_direct[i] = (mi_page_t*)&_mi_page_empty;
+  }
+
+  // push on the thread local theaps list
+  theap->tnext = theap->tld->theaps;
+  theap->tld->theaps = theap;
+  return true;
+}
+*/
+
+
+/* -----------------------------------------------------------
+  Visit all theap blocks and areas
+  Todo: enable visiting abandoned pages, and
+        enable visiting all blocks of all theaps across threads
+----------------------------------------------------------- */
+
+void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page) {
+  const size_t bsize = mi_page_block_size(page);
+  const size_t ubsize = mi_page_usable_block_size(page);
+  area->reserved = page->reserved * bsize;
+  area->committed = page->capacity * bsize;
+  area->blocks = mi_page_start(page);
+  area->used = page->used;   // number of blocks in use (#553)
+  area->block_size = ubsize;
+  area->full_block_size = bsize;
+  area->reserved1 = page;
+}
+
+static void mi_get_fast_divisor(size_t divisor, uint64_t* magic, size_t* shift) {
+  mi_assert_internal(divisor > 0 && divisor <= UINT32_MAX);
+  *shift = MI_SIZE_BITS - mi_clz(divisor - 1);
+  *magic = ((((uint64_t)1 << 32) * (((uint64_t)1 << *shift) - divisor)) / divisor + 1);
+}
+
+static size_t mi_fast_divide(size_t n, uint64_t magic, size_t shift) {
+  mi_assert_internal(n <= UINT32_MAX);
+  const uint64_t hi = ((uint64_t)n * magic) >> 32;
+  return (size_t)((hi + n) >> shift);
+}
+
+bool _mi_theap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* page, mi_block_visit_fun* visitor, void* arg) {
+  mi_assert(area != NULL);
+  if (area==NULL) return true;
+  mi_assert(page != NULL);
+  if (page == NULL) return true;
+
+  _mi_page_free_collect(page,true);              // collect both thread_delayed and local_free
+  mi_assert_internal(page->local_free == NULL);
+  if (page->used == 0) return true;
+
+  size_t psize;
+  uint8_t* const pstart = mi_page_area(page, &psize);
+  mi_heap_t* const heap = mi_page_heap(page);
+  const size_t bsize    = mi_page_block_size(page);
+  const size_t ubsize   = mi_page_usable_block_size(page); // without padding
+
+  // optimize page with one block
+  if (page->capacity == 1) {
+    mi_assert_internal(page->used == 1 && page->free == NULL);
+    return visitor(heap, area, pstart, ubsize, arg);
+  }
+  mi_assert(bsize <= UINT32_MAX);
+
+  // optimize full pages
+  if (page->used == page->capacity) {
+    uint8_t* block = pstart;
+    for (size_t i = 0; i < page->capacity; i++) {
+      if (!visitor(heap, area, block, ubsize, arg)) return false;
+      block += bsize;
+    }
+    return true;
+  }
+
+  // create a bitmap of free blocks.
+  #define MI_MAX_BLOCKS   (MI_SMALL_PAGE_SIZE / sizeof(void*))
+  uintptr_t free_map[MI_MAX_BLOCKS / MI_INTPTR_BITS];
+  const uintptr_t bmapsize = _mi_divide_up(page->capacity, MI_INTPTR_BITS);
+  memset(free_map, 0, bmapsize * sizeof(intptr_t));
+  if (page->capacity % MI_INTPTR_BITS != 0) {
+    // mark left-over bits at the end as free
+    size_t shift   = (page->capacity % MI_INTPTR_BITS);
+    uintptr_t mask = (UINTPTR_MAX << shift);
+    free_map[bmapsize - 1] = mask;
+  }
+
+  // fast repeated division by the block size
+  uint64_t magic;
+  size_t   shift;
+  mi_get_fast_divisor(bsize, &magic, &shift);
+
+  #if MI_DEBUG>1
+  size_t free_count = 0;
+  #endif
+  for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page, block)) {
+    #if MI_DEBUG>1
+    free_count++;
+    #endif
+    mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize));
+    size_t offset = (uint8_t*)block - pstart;
+    mi_assert_internal(offset % bsize == 0);
+    mi_assert_internal(offset <= UINT32_MAX);
+    size_t blockidx = mi_fast_divide(offset, magic, shift);
+    mi_assert_internal(blockidx == offset / bsize);
+    mi_assert_internal(blockidx < MI_MAX_BLOCKS);
+    size_t bitidx = (blockidx / MI_INTPTR_BITS);
+    size_t bit = blockidx - (bitidx * MI_INTPTR_BITS);
+    free_map[bitidx] |= ((uintptr_t)1 << bit);
+  }
+  mi_assert_internal(page->capacity == (free_count + page->used));
+
+  // walk through all blocks skipping the free ones
+  #if MI_DEBUG>1
+  size_t used_count = 0;
+  #endif
+  uint8_t* block = pstart;
+  for (size_t i = 0; i < bmapsize; i++) {
+    if (free_map[i] == 0) {
+      // every block is in use
+      for (size_t j = 0; j < MI_INTPTR_BITS; j++) {
+        #if MI_DEBUG>1
+        used_count++;
+        #endif
+        if (!visitor(heap, area, block, ubsize, arg)) return false;
+        block += bsize;
+      }
+    }
+    else {
+      // visit the used blocks in the mask
+      uintptr_t m = ~free_map[i];
+      while (m != 0) {
+        #if MI_DEBUG>1
+        used_count++;
+        #endif
+        size_t bitidx = mi_ctz(m);
+        if (!visitor(heap, area, block + (bitidx * bsize), ubsize, arg)) return false;
+        m &= m - 1;  // clear least significant bit
+      }
+      block += bsize * MI_INTPTR_BITS;
+    }
+  }
+  mi_assert_internal(page->used == used_count);
+  return true;
+}
+
+
+
+// Separate struct to keep `mi_page_t` out of the public interface
+typedef struct mi_theap_area_ex_s {
+  mi_heap_area_t area;
+  mi_page_t* page;
+} mi_theap_area_ex_t;
+
+typedef bool (mi_theap_area_visit_fun)(const mi_theap_t* theap, const mi_theap_area_ex_t* area, void* arg);
+
+static bool mi_theap_visit_areas_page(mi_theap_t* theap, mi_page_queue_t* pq, mi_page_t* page, void* vfun, void* arg) {
+  MI_UNUSED(theap);
+  MI_UNUSED(pq);
+  mi_theap_area_visit_fun* fun = (mi_theap_area_visit_fun*)vfun;
+  mi_theap_area_ex_t xarea;
+  xarea.page = page;
+  _mi_heap_area_init(&xarea.area, page);
+  return fun(theap, &xarea, arg);
+}
+
+// Visit all theap pages as areas
+static bool mi_theap_visit_areas(const mi_theap_t* theap, mi_theap_area_visit_fun* visitor, void* arg) {
+  if (visitor == NULL) return false;
+  return mi_theap_visit_pages((mi_theap_t*)theap, &mi_theap_visit_areas_page, (void*)(visitor), arg); // note: function pointer to void* :-{
+}
+
+// Just to pass arguments
+typedef struct mi_visit_blocks_args_s {
+  bool  visit_blocks;
+  mi_block_visit_fun* visitor;
+  void* arg;
+} mi_visit_blocks_args_t;
+
+static bool mi_theap_area_visitor(const mi_theap_t* theap, const mi_theap_area_ex_t* xarea, void* arg) {
+  mi_visit_blocks_args_t* args = (mi_visit_blocks_args_t*)arg;
+  if (!args->visitor(theap->heap, &xarea->area, NULL, xarea->area.block_size, args->arg)) return false;
+  if (args->visit_blocks) {
+    return _mi_theap_area_visit_blocks(&xarea->area, xarea->page, args->visitor, args->arg);
+  }
+  else {
+    return true;
+  }
+}
+
+// Visit all blocks in a theap
+bool mi_theap_visit_blocks(const mi_theap_t* theap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
+  mi_visit_blocks_args_t args = { visit_blocks, visitor, arg };
+  return mi_theap_visit_areas(theap, &mi_theap_area_visitor, &args);
+}
+
diff --git a/3rd/mimalloc-3.2.6/src/threadlocal.c b/3rd/mimalloc-3.2.6/src/threadlocal.c
new file mode 100644
index 00000000..288f623e
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/src/threadlocal.c
@@ -0,0 +1,174 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2019-2025, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+/* ----------------------------------------------------------------------------
+Implement dynamic thread local variables (for heap's).
+Unlike most OS native implementations there is no limit on the number
+that can be allocated.
+-----------------------------------------------------------------------------*/
+
+#include "mimalloc.h"
+#include "mimalloc/internal.h"
+#include "mimalloc/prim.h"
+
+/* -----------------------------------------------------------
+Each thread can have (a dynamically expanding) array of
+thread-local values.
+----------------------------------------------------------- */
+
+typedef struct mi_thread_locals_s {
+  size_t count;
+  void*  slots[1];
+} mi_thread_locals_t;
+
+static mi_thread_locals_t mi_thread_locals_empty = { 0, {NULL} };
+
+mi_decl_thread mi_thread_locals_t* mi_thread_locals = &mi_thread_locals_empty;  // always point to a valid `mi_thread_locals_t`
+
+// dynamically reallocate the thread local slots when needed
+static mi_thread_locals_t* mi_thread_locals_expand(mi_thread_local_t atleast) {
+  mi_thread_locals_t* tls_old = mi_thread_locals;
+  const size_t count_old = tls_old->count;
+  size_t count;
+  if (count_old==0) {
+    tls_old = NULL; // so we allocate fresh from mi_thread_locals_empty
+    count = 16;     // start with 16 slots
+  } else if (count_old >= 1024) {
+    count = count_old + 1024;  // at some point increase linearly
+  }
+  else {
+    count = 2*count_old;       // and double initially
+  }
+  if (count <= atleast) {
+    count = atleast + 1;
+  }
+  mi_thread_locals_t* tls = (mi_thread_locals_t*)mi_rezalloc(tls_old, sizeof(mi_thread_locals_t) + count*sizeof(void*));
+  if mi_unlikely(tls==NULL) return NULL;
+  tls->count = count;
+  mi_thread_locals = tls;
+  return tls;
+}
+
+static mi_decl_noinline bool mi_thread_local_set_expand( mi_thread_local_t key, void* val ) {
+  if (val==NULL) return true;
+  mi_thread_locals_t* tls = mi_thread_locals_expand(key);
+  if (tls==NULL) return false;
+  mi_assert_internal(key < tls->count);
+  mi_assert_internal(tls == mi_thread_locals);
+  tls->slots[key] = val;
+  return true;
+}
+
+// set a tls slot; returns `true` if successful.
+// Can return `false` if we could not reallocate the slots array.
+bool _mi_thread_local_set( mi_thread_local_t key, void* val ) {
+  mi_thread_locals_t* tls = mi_thread_locals;
+  mi_assert_internal(tls!=NULL);
+  if mi_likely(key < tls->count) {
+    tls->slots[key] = val;
+    return true;
+  }
+  else {
+    return mi_thread_local_set_expand( key, val );  // tailcall
+  }
+}
+
+// get a tls slot value
+void* _mi_thread_local_get( mi_thread_local_t key ) {
+  const mi_thread_locals_t* const tls = mi_thread_locals;
+  mi_assert_internal(tls!=NULL);
+  if mi_likely(key < tls->count) {
+    return tls->slots[key];
+  }
+  else {
+    return NULL;
+  }
+}
+
+void _mi_thread_locals_thread_done(void) {
+  mi_thread_locals_t* const tls = mi_thread_locals;
+  if (tls!=NULL && tls->count > 0) {
+    mi_free(tls);
+    mi_thread_locals = &mi_thread_locals_empty;
+  }
+}
+
+/* -----------------------------------------------------------
+Create and free fresh TLS key's
+----------------------------------------------------------- */
+#include "bitmap.h"
+
+static mi_lock_t    mi_thread_locals_lock;    // we need a lock in order to re-allocate the slot bits
+static mi_bitmap_t* mi_thread_locals_free;    // reuse an arena bitmap to track which slots were assigned (1=free, 0=in-use)
+
+void _mi_thread_locals_init(void) {
+  mi_lock_init(&mi_thread_locals_lock);
+}
+
+void _mi_thread_locals_done(void) {
+  mi_lock(&mi_thread_locals_lock) {
+    mi_bitmap_t* const slots = mi_thread_locals_free;
+    mi_free(slots);
+  }
+  mi_lock_done(&mi_thread_locals_lock);
+}
+
+// strange signature but allows us to reuse the arena code for claiming free pages
+static bool mi_thread_local_claim(size_t _slice_index, mi_arena_t* _arena, bool* keep_set) {
+  MI_UNUSED(_slice_index); MI_UNUSED(_arena);
+  *keep_set = false;
+  return true;
+}
+
+static mi_thread_local_t mi_thread_local_create_expand(void) {
+  size_t key = 0;
+  mi_bitmap_t* slots = mi_thread_locals_free;
+  // 1024 bits at a time
+  const size_t oldcount = (slots==NULL ? 0 : mi_bitmap_max_bits(slots));
+  const size_t newcount = 1024 + oldcount;
+  const size_t newsize = mi_bitmap_size( newcount, NULL );
+  slots = (mi_bitmap_t*)mi_realloc_aligned(slots, newsize, MI_BCHUNK_SIZE);
+  if (slots != NULL) {
+    mi_bitmap_init(slots, newcount, true /* or otherwise we would zero all old entries */);
+    mi_bitmap_unsafe_setN(slots, oldcount, newcount - oldcount);
+    mi_thread_locals_free = slots;
+    size_t idx = 0;
+    if mi_likely(slots!=NULL && mi_bitmap_try_find_and_claim(slots,0,&idx,&mi_thread_local_claim,NULL)) {
+      key = idx+1;
+    }
+  }
+  return key;
+}
+
+// create a fresh key
+mi_thread_local_t _mi_thread_local_create(void) {
+  mi_thread_local_t key = 0;
+  mi_lock(&mi_thread_locals_lock) {
+    mi_bitmap_t* slots = mi_thread_locals_free;
+    size_t idx = 0;
+    if mi_likely(slots!=NULL && mi_bitmap_try_find_and_claim(slots,0,&idx,&mi_thread_local_claim,NULL)) {
+      key = idx+1;
+    }
+    else {
+      key = mi_thread_local_create_expand();
+    }
+  }
+  return key;
+}
+
+// free a key
+void _mi_thread_local_free(mi_thread_local_t key) {
+  if (key==0) return;
+  const size_t idx = key-1;
+  mi_lock(&mi_thread_locals_lock) {
+    mi_bitmap_t* const slots = mi_thread_locals_free;
+    if (slots!=NULL && idx < mi_bitmap_max_bits(slots)) {
+      mi_bitmap_set(slots,key-1);
+    }
+  }
+}
+
diff --git a/3rd/mimalloc-2.0.9/test/CMakeLists.txt b/3rd/mimalloc-3.2.6/test/CMakeLists.txt
similarity index 86%
rename from 3rd/mimalloc-2.0.9/test/CMakeLists.txt
rename to 3rd/mimalloc-3.2.6/test/CMakeLists.txt
index e76ffa64..199f4809 100644
--- a/3rd/mimalloc-2.0.9/test/CMakeLists.txt
+++ b/3rd/mimalloc-3.2.6/test/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0)
+cmake_minimum_required(VERSION 3.18)
 project(mimalloc-test C CXX)
 
 set(CMAKE_C_STANDARD 11)
@@ -16,10 +16,12 @@ if (NOT CMAKE_BUILD_TYPE)
 endif()
 
 # Import mimalloc (if installed)
-find_package(mimalloc 2.0 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH)
+find_package(mimalloc CONFIG REQUIRED)
 message(STATUS "Found mimalloc installed at: ${MIMALLOC_LIBRARY_DIR} (${MIMALLOC_VERSION_DIR})")
 
-# overriding with a dynamic library
+
+# link with a dynamic shared library
+# use `LD_PRELOAD` to actually override malloc/free at runtime with mimalloc
 add_executable(dynamic-override  main-override.c)
 target_link_libraries(dynamic-override PUBLIC mimalloc)
 
@@ -29,9 +31,9 @@ target_link_libraries(dynamic-override-cxx PUBLIC mimalloc)
 
 # overriding with a static object file works reliable as the symbols in the
 # object file have priority over those in library files
-add_executable(static-override-obj main-override.c ${MIMALLOC_OBJECT_DIR}/mimalloc.o)
+add_executable(static-override-obj main-override.c ${MIMALLOC_OBJECT_DIR}/mimalloc${CMAKE_C_OUTPUT_EXTENSION})
 target_include_directories(static-override-obj PUBLIC ${MIMALLOC_INCLUDE_DIR})
-target_link_libraries(static-override-obj PUBLIC pthread)
+target_link_libraries(static-override-obj PUBLIC mimalloc-static)
 
 
 # overriding with a static library works too if using the `mimalloc-override.h`
diff --git a/3rd/mimalloc-3.2.6/test/main-override-dep.cpp b/3rd/mimalloc-3.2.6/test/main-override-dep.cpp
new file mode 100644
index 00000000..772e3702
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/test/main-override-dep.cpp
@@ -0,0 +1,60 @@
+// Issue #981: test overriding allocation in a DLL that is compiled independent of mimalloc.
+// This is imported by the `mimalloc-test-override` project.
+#include <string>
+#include <iostream>
+#include "main-override-dep.h"
+
+std::string TestAllocInDll::GetString()
+{
+	char* test = new char[128];
+	memset(test, 0, 128);
+	const char* t = "test";
+	memcpy(test, t, 4);
+	std::string r = test;
+  std::cout << "override-dep: GetString: " << r << "\n";
+	delete[] test;
+	return r;
+}
+
+#include <windows.h>
+
+void TestAllocInDll::TestHeapAlloc()
+{
+	HANDLE theap = GetProcessHeap();
+	int* p = (int*)HeapAlloc(theap, 0, sizeof(int));
+	*p = 42;
+	HeapFree(theap, 0, p);
+}
+
+class Static {
+private:
+  void* p;
+public:
+  Static() {
+    printf("override-dep: static constructor\n");
+    p = malloc(64);
+    return;
+  }
+  ~Static() {
+    free(p);
+    printf("override-dep: static destructor\n");
+    return;
+  }
+};
+
+static Static s = Static();
+
+
+#include <windows.h>
+
+BOOL WINAPI DllMain(HINSTANCE module, DWORD reason, LPVOID reserved) {
+  (void)(reserved);
+  (void)(module);
+  if (reason==DLL_PROCESS_ATTACH) {
+    printf("override-dep: dll attach\n");
+  }
+  else if (reason==DLL_PROCESS_DETACH) {
+    printf("override-dep: dll detach\n");
+  }
+  return TRUE;
+}
diff --git a/3rd/mimalloc-3.2.6/test/main-override-dep.h b/3rd/mimalloc-3.2.6/test/main-override-dep.h
new file mode 100644
index 00000000..9d4aabfd
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/test/main-override-dep.h
@@ -0,0 +1,12 @@
+#pragma once
+// Issue #981: test overriding allocation in a DLL that is compiled independent of mimalloc. 
+// This is imported by the `mimalloc-test-override` project.
+
+#include <string>
+
+class TestAllocInDll
+{
+public:
+	__declspec(dllexport) std::string GetString();
+	__declspec(dllexport) void TestHeapAlloc();
+};
diff --git a/3rd/mimalloc-2.0.9/test/main-override-static.c b/3rd/mimalloc-3.2.6/test/main-override-static.c
similarity index 64%
rename from 3rd/mimalloc-2.0.9/test/main-override-static.c
rename to 3rd/mimalloc-3.2.6/test/main-override-static.c
index 534c8849..bf5e5449 100644
--- a/3rd/mimalloc-2.0.9/test/main-override-static.c
+++ b/3rd/mimalloc-3.2.6/test/main-override-static.c
@@ -1,3 +1,6 @@
+#if _WIN32
+#include <windows.h>
+#endif
 #include <stdlib.h>
 #include <stdio.h>
 #include <assert.h>
@@ -7,11 +10,13 @@
 #include <mimalloc.h>
 #include <mimalloc-override.h>  // redefines malloc etc.
 
+static void mi_bins(void);
 
 static void double_free1();
 static void double_free2();
 static void corrupt_free();
 static void block_overflow1();
+static void block_overflow2();
 static void invalid_free();
 static void test_aslr(void);
 static void test_process_info(void);
@@ -19,25 +24,34 @@ static void test_reserved(void);
 static void negative_stat(void);
 static void alloc_huge(void);
 static void test_heap_walk(void);
-static void test_heap_arena(void);
+static void test_canary_leak(void);
+static void test_manage_os_memory(void);
+// static void test_large_pages(void);
+
 
 int main() {
   mi_version();
   mi_stats_reset();
-  // detect double frees and heap corruption
+
+  // mi_bins();
+
+  // test_manage_os_memory();
+  // test_large_pages();
+  // detect double frees and theap corruption
   // double_free1();
   // double_free2();
   // corrupt_free();
   // block_overflow1();
+  // block_overflow2();
+  test_canary_leak();
   // test_aslr();
   // invalid_free();
   // test_reserved();
   // negative_stat();
-  // test_heap_walk();
+  // test_theap_walk();
   // alloc_huge();
-  // test_heap_walk();
-  // test_heap_arena();
-  
+
+
   void* p1 = malloc(78);
   void* p2 = malloc(24);
   free(p1);
@@ -45,15 +59,15 @@ int main() {
   char* s = strdup("hello\n");
   free(p2);
 
-  mi_heap_t* h = mi_heap_new();
-  mi_heap_set_default(h);
+  // mi_theap_t* h = mi_theap_new();
+  // mi_theap_set_default(h);
 
   p2 = malloc(16);
   p1 = realloc(p1, 32);
   free(p1);
   free(p2);
   free(s);
-  
+
   /* now test if override worked by allocating/freeing across the api's*/
   //p1 = mi_malloc(32);
   //free(p1);
@@ -64,13 +78,13 @@ int main() {
   //mi_stats_print(NULL);
 
   // test_process_info();
-  
+
   return 0;
 }
 
 static void invalid_free() {
   free((void*)0xBADBEEF);
-  realloc((void*)0xBADBEEF,10);
+  realloc((void*)0xBADBEEF, 10);
 }
 
 static void block_overflow1() {
@@ -79,6 +93,12 @@ static void block_overflow1() {
   free(p);
 }
 
+static void block_overflow2() {
+  uint8_t* p = (uint8_t*)mi_malloc(16);
+  p[17] = 0;
+  free(p);
+}
+
 // The double free samples come ArcHeap [1] by Insu Yun (issue #161)
 // [1]: https://arxiv.org/pdf/1903.00503.pdf
 
@@ -117,7 +137,7 @@ static void double_free2() {
 }
 
 
-// Try to corrupt the heap through buffer overflow
+// Try to corrupt the theap through buffer overflow
 #define N   256
 #define SZ  64
 
@@ -162,7 +182,7 @@ static void test_process_info(void) {
   size_t peak_commit = 0;
   size_t page_faults = 0;
   for (int i = 0; i < 100000; i++) {
-    void* p = calloc(100,10);
+    void* p = calloc(100, 10);
     free(p);
   }
   mi_process_info(&elapsed, &user_msecs, &system_msecs, &current_rss, &peak_rss, &current_commit, &peak_commit, &page_faults);
@@ -173,7 +193,7 @@ static void test_reserved(void) {
 #define KiB 1024ULL
 #define MiB (KiB*KiB)
 #define GiB (MiB*KiB)
-  mi_reserve_os_memory(4*GiB, false, true);
+  mi_reserve_os_memory(3*GiB, false, true);
   void* p1 = malloc(100);
   void* p2 = malloc(100000);
   void* p3 = malloc(2*GiB);
@@ -219,20 +239,78 @@ static void test_heap_walk(void) {
   mi_heap_visit_blocks(heap, true, &test_visit, NULL);
 }
 
-static void test_heap_arena(void) {
+static void test_canary_leak(void) {
+  char* p = mi_mallocn_tp(char, 22);
+  for (int i = 0; i < 22; i++) {
+    p[i] = '0'+i;
+  }
+  puts(p);
+  free(p);
+}
+
+#if _WIN32
+static void test_manage_os_memory(void) {
+  size_t size = 256 * 1024 * 1024;
+  void* ptr = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
   mi_arena_id_t arena_id;
-  int err = mi_reserve_os_memory_ex(100 * 1024 * 1024, false /* commit */, false /* allow large */, true /* exclusive */, &arena_id);
-  if (err) abort();
-  mi_heap_t* heap = mi_heap_new_in_arena(arena_id);
-  for (int i = 0; i < 500000; i++) {
-    void* p = mi_heap_malloc(heap, 1024);
-    if (p == NULL) {
-      printf("out of memory after %d kb (expecting about 100_000kb)\n", i);
-      break;
-    }
+  mi_manage_os_memory_ex(ptr, size, true /* committed */, true /* pinned */, false /* is zero */, -1 /* numa node */, true /* exclusive */, &arena_id);
+  mi_heap_t* cuda_theap = mi_heap_new_in_arena(arena_id);    // you can do this in any thread
+
+  // now allocate only in the cuda arena
+  void* p1 = mi_heap_malloc(cuda_theap, 8);
+  int* p2  = mi_heap_malloc_tp(int,cuda_theap);
+  *p2 = 42;
+
+  // and maybe set the cuda theap as the default theap? (but careful as now `malloc` will allocate in the cuda theap as well)
+  {
+    mi_theap_t* prev_default_theap = mi_theap_set_default(mi_heap_theap(cuda_theap));
+    void* p3 = mi_malloc(8);  // allocate in the cuda theap
+    mi_free(p3);
   }
+  mi_free(p1);
+  mi_free(p2);
+}
+#else
+static void test_manage_os_memory(void) {
+  // empty
+}
+#endif
+
+// Experiment with huge OS pages
+#if 0
+
+#include <mimalloc/types.h>
+#include <mimalloc/internal.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+static void test_large_pages(void) {
+  mi_memid_t memid;
+
+#if 0
+  size_t pages_reserved;
+  size_t page_size;
+  uint8_t* p = (uint8_t*)_mi_os_alloc_huge_os_pages(1, -1, 30000, &pages_reserved, &page_size, &memid);
+  const size_t req_size = pages_reserved * page_size;
+#else
+  const size_t req_size = 64*MI_MiB;
+  uint8_t* p = (uint8_t*)_mi_os_alloc(req_size, &memid, NULL);
+#endif
+
+  p[0] = 1;
+
+  //_mi_os_protect(p, _mi_os_page_size());
+  //_mi_os_unprotect(p, _mi_os_page_size());
+  //_mi_os_decommit(p, _mi_os_page_size(), NULL);
+  if (madvise(p, req_size, MADV_HUGEPAGE) == 0) {
+    printf("advised huge pages\n");
+    _mi_os_decommit(p, _mi_os_page_size(), NULL);
+  };
+  _mi_os_free(p, req_size, memid, NULL);
 }
 
+#endif
+
 // ----------------------------
 // bin size experiments
 // ------------------------------
@@ -240,8 +318,8 @@ static void test_heap_arena(void) {
 #if 0
 #include <stdint.h>
 #include <stdbool.h>
+#include <mimalloc/bits.h>
 
-#define MI_INTPTR_SIZE 8
 #define MI_LARGE_WSIZE_MAX (4*1024*1024 / MI_INTPTR_SIZE)
 
 #define MI_BIN_HUGE 100
@@ -251,11 +329,11 @@ static void test_heap_arena(void) {
 static inline uint8_t mi_bsr32(uint32_t x);
 
 #if defined(_MSC_VER)
-#include <windows.h>
+//#include <Windows.h>
 #include <intrin.h>
 static inline uint8_t mi_bsr32(uint32_t x) {
   uint32_t idx;
-  _BitScanReverse((DWORD*)&idx, x);
+  _BitScanReverse(&idx, x);
   return idx;
 }
 #elif defined(__GNUC__) || defined(__clang__)
@@ -279,7 +357,7 @@ static inline uint8_t mi_bsr32(uint32_t x) {
 }
 #endif
 
-/*
+
 // Bit scan reverse: return the index of the highest bit.
 uint8_t _mi_bsr(uintptr_t x) {
   if (x == 0) return 0;
@@ -292,38 +370,39 @@ uint8_t _mi_bsr(uintptr_t x) {
   # error "define bsr for non-32 or 64-bit platforms"
   #endif
 }
-*/
-
 
 static inline size_t _mi_wsize_from_size(size_t size) {
   return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t);
 }
 
+// #define MI_ALIGN2W
+
 // Return the bin for a given field size.
 // Returns MI_BIN_HUGE if the size is too large.
 // We use `wsize` for the size in "machine word sizes",
 // i.e. byte size == `wsize*sizeof(void*)`.
-extern inline uint8_t _mi_bin8(size_t size) {
-  size_t wsize = _mi_wsize_from_size(size);
-  uint8_t bin;
-  if (wsize <= 1) {
+static inline size_t mi_bin(size_t wsize) {
+  // size_t wsize = _mi_wsize_from_size(size);
+  // size_t bin;
+  /*if (wsize <= 1) {
     bin = 1;
   }
+  */
 #if defined(MI_ALIGN4W)
-  else if (wsize <= 4) {
-    bin = (uint8_t)((wsize+1)&~1); // round to double word sizes
+  if (wsize <= 4) {
+    return (wsize <= 1 ? 1 : (wsize+1)&~1); // round to double word sizes
   }
 #elif defined(MI_ALIGN2W)
-  else if (wsize <= 8) {
-    bin = (uint8_t)((wsize+1)&~1); // round to double word sizes
+  if (wsize <= 8) {
+    return (wsize <= 1 ? 1 : (wsize+1)&~1); // round to double word sizes
   }
 #else
-  else if (wsize <= 8) {
-    bin = (uint8_t)wsize;
+  if (wsize <= 8) {
+    return (wsize == 0 ? 1 : wsize);
   }
 #endif
   else if (wsize > MI_LARGE_WSIZE_MAX) {
-    bin = MI_BIN_HUGE;
+    return MI_BIN_HUGE;
   }
   else {
 #if defined(MI_ALIGN4W)
@@ -331,15 +410,19 @@ extern inline uint8_t _mi_bin8(size_t size) {
 #endif
     wsize--;
     // find the highest bit
-    uint8_t b = mi_bsr32((uint32_t)wsize);
+    size_t idx;
+    mi_bsr(wsize, &idx);
+    uint8_t b = (uint8_t)idx;
     // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
     // - adjust with 3 because we use do not round the first 8 sizes
     //   which each get an exact bin
-    bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3;
+    const size_t bin = ((b << 2) + ((wsize >> (b - 2)) & 0x03)) - 3;
+    assert(bin > 0 && bin < MI_BIN_HUGE);
+    return bin;
   }
-  return bin;
 }
 
+
 static inline uint8_t _mi_bin4(size_t size) {
   size_t wsize = _mi_wsize_from_size(size);
   uint8_t bin;
@@ -363,44 +446,79 @@ static inline uint8_t _mi_bin4(size_t size) {
     bin = MI_BIN_HUGE;
   }
   else {
-    uint8_t b = mi_bsr32((uint32_t)wsize);
+    size_t idx;
+    mi_bsr(wsize, &idx);
+    uint8_t b = (uint8_t)idx;
     bin = ((b << 1) + (uint8_t)((wsize >> (b - 1)) & 0x01)) + 3;
   }
   return bin;
 }
 
-static size_t _mi_binx4(size_t bsize) {
-  if (bsize==0) return 0;
-  uint8_t b = mi_bsr32((uint32_t)bsize);
-  if (b <= 1) return bsize;
-  size_t bin = ((b << 1) | (bsize >> (b - 1))&0x01);
+static size_t _mi_binx4(size_t wsize) {
+  size_t bin;
+  if (wsize <= 1) {
+    bin = 1;
+  }
+  else if (wsize <= 8) {
+    // bin = (wsize+1)&~1; // round to double word sizes
+    bin = (uint8_t)wsize;
+  }
+  else {
+    size_t idx;
+    mi_bsr(wsize, &idx);
+    uint8_t b = (uint8_t)idx;
+    if (b <= 1) return wsize;
+    bin = ((b << 1) | (wsize >> (b - 1))&0x01) + 3;
+  }
   return bin;
 }
 
 static size_t _mi_binx8(size_t bsize) {
   if (bsize<=1) return bsize;
-  uint8_t b = mi_bsr32((uint32_t)bsize);
+  size_t idx;
+  mi_bsr(bsize, &idx);
+  uint8_t b = (uint8_t)idx;
   if (b <= 2) return bsize;
   size_t bin = ((b << 2) | (bsize >> (b - 2))&0x03) - 5;
   return bin;
 }
 
+
+static inline size_t mi_binx(size_t wsize) {
+  uint8_t bin;
+  if (wsize <= 1) {
+    bin = 1;
+  }
+  else if (wsize <= 8) {
+    // bin = (wsize+1)&~1; // round to double word sizes
+    bin = (uint8_t)wsize;
+  }
+  else {
+    wsize--;
+    assert(wsize>0);
+    // find the highest bit
+    uint8_t b = (uint8_t)(MI_SIZE_BITS - 1 - mi_clz(wsize));
+
+    // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
+    // - adjust with 3 because we use do not round the first 8 sizes
+    //   which each get an exact bin
+    bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3;
+  }
+  return bin;
+}
+
+
 static void mi_bins(void) {
   //printf("  QNULL(1), /* 0 */ \\\n  ");
   size_t last_bin = 0;
-  size_t min_bsize = 0;
-  size_t last_bsize = 0;
-  for (size_t bsize = 1; bsize < 2*1024; bsize++) {
-    size_t size = bsize * 64 * 1024;
-    size_t bin = _mi_binx8(bsize);
+  for (size_t wsize = 1; wsize <= (4*1024*1024) / 8 + 1024; wsize++) {
+    size_t bin = mi_bin(wsize);
     if (bin != last_bin) {
-      printf("min bsize: %6zd, max bsize: %6zd, bin: %6zd\n", min_bsize, last_bsize, last_bin);
-      //printf("QNULL(%6zd), ", wsize);
-      //if (last_bin%8 == 0) printf("/* %i */ \\\n  ", last_bin);
+      //printf("min bsize: %6zd, max bsize: %6zd, bin: %6zd\n", min_wsize, last_wsize, last_bin);
+      printf("QNULL(%6zd), ", wsize-1);
+      if (last_bin%8 == 0) printf("/* %zu */ \\\n  ", last_bin);
       last_bin = bin;
-      min_bsize = bsize;
     }
-    last_bsize = bsize;
   }
 }
 #endif
diff --git a/3rd/mimalloc-2.0.9/test/main-override.c b/3rd/mimalloc-3.2.6/test/main-override.c
similarity index 100%
rename from 3rd/mimalloc-2.0.9/test/main-override.c
rename to 3rd/mimalloc-3.2.6/test/main-override.c
diff --git a/3rd/mimalloc-3.2.6/test/main-override.cpp b/3rd/mimalloc-3.2.6/test/main-override.cpp
new file mode 100644
index 00000000..67e044a7
--- /dev/null
+++ b/3rd/mimalloc-3.2.6/test/main-override.cpp
@@ -0,0 +1,542 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <stdint.h>
+
+#include <mimalloc.h>
+#include <new>
+#include <vector>
+#include <future>
+#include <iostream>
+#include <thread>
+#include <random>
+#include <chrono>
+#include <assert.h>
+
+#ifdef _WIN32
+#include <mimalloc-new-delete.h>
+#include <windows.h>
+static void msleep(unsigned long msecs) { Sleep(msecs); }
+#else
+#include <unistd.h>
+static void msleep(unsigned long msecs) { usleep(msecs * 1000UL); }
+#endif
+
+static void theap_thread_free_large(); // issue #221
+static void theap_no_delete();         // issue #202
+static void theap_late_free();         // issue #204
+static void padding_shrink();         // issue #209
+static void various_tests();
+static void test_mt_shutdown();
+static void fail_aslr();              // issue #372
+static void tsan_numa_test();         // issue #414
+static void strdup_test();            // issue #445
+static void theap_thread_free_huge();
+static void test_std_string();        // issue #697
+static void test_thread_local();      // issue #944
+// static void test_mixed0();             // issue #942
+static void test_mixed1();             // issue #942
+static void test_stl_allocators();
+static void test_join();              // issue #1177
+static void test_thread_leak(void);   // issue #1104
+static void test_perf(void);          // issue #1104
+
+
+#if _WIN32
+#include "main-override-dep.h"
+static void test_dep();               // issue #981: test overriding in another DLL
+#else
+static void test_dep() { };
+#endif
+
+int main() {
+  mi_stats_reset();  // ignore earlier allocations
+  //various_tests();
+  //test_mixed1();
+
+  // test_dep();
+  // test_join();
+
+  // test_thread_leak();
+  test_perf();
+
+  //test_std_string();
+  //test_thread_local();
+  // theap_thread_free_huge();
+  /*
+  theap_thread_free_large();
+  theap_no_delete();
+  theap_late_free();
+  padding_shrink();
+
+  tsan_numa_test();
+  */
+  /*
+  strdup_test();
+  test_stl_allocators();
+  test_mt_shutdown();
+  */
+  //fail_aslr();
+  mi_stats_print(NULL);
+  return 0;
+}
+
+static void* p = malloc(8);
+
+void free_p() {
+  free(p);
+  return;
+}
+
+class Test {
+private:
+  int i;
+public:
+  Test(int x) { i = x; }
+  ~Test() { }
+};
+
+
+static void various_tests() {
+  atexit(free_p);
+  void* p1 = malloc(78);
+  void* p2 = mi_malloc_aligned(24, 16);
+  free(p1);
+  p1 = malloc(8);
+  char* s = mi_strdup("hello\n");
+
+  mi_free(p2);
+  p2 = malloc(16);
+  p1 = realloc(p1, 32);
+  free(p1);
+  free(p2);
+  mi_free(s);
+
+  Test* t = new Test(42);
+  delete t;
+  t = new (std::nothrow) Test(42);
+  delete t;
+  auto tbuf = new unsigned char[sizeof(Test)];
+  t = new (tbuf) Test(42);
+  t->~Test();
+  delete[] tbuf;
+
+  #if _WIN32
+  const char* ptr = ::_Getdays();  // test _base overrid
+  free((void*)ptr);
+  #endif
+}
+
+class Static {
+private:
+  void* p;
+public:
+  Static() {
+    p = malloc(64);
+    return;
+  }
+  ~Static() {
+    free(p);
+    return;
+  }
+};
+
+static Static s = Static();
+
+
+static bool test_stl_allocator1() {
+  std::vector<int, mi_stl_allocator<int> > vec;
+  vec.push_back(1);
+  vec.pop_back();
+  return vec.size() == 0;
+}
+
+struct some_struct { int i; int j; double z; };
+
+
+#if _WIN32
+static void test_dep()
+{
+  TestAllocInDll t;
+  std::string s = t.GetString();
+  std::cout << "test_dep GetString: " << s << "\n";
+  t.TestHeapAlloc();
+}
+#endif
+
+
+static bool test_stl_allocator2() {
+  std::vector<some_struct, mi_stl_allocator<some_struct> > vec;
+  vec.push_back(some_struct());
+  vec.pop_back();
+  return vec.size() == 0;
+}
+
+#if MI_HAS_HEAP_STL_ALLOCATOR
+static bool test_stl_allocator3() {
+  std::vector<int, mi_heap_stl_allocator<int> > vec;
+  vec.push_back(1);
+  vec.pop_back();
+  return vec.size() == 0;
+}
+
+static bool test_stl_allocator4() {
+  std::vector<some_struct, mi_heap_stl_allocator<some_struct> > vec;
+  vec.push_back(some_struct());
+  vec.pop_back();
+  return vec.size() == 0;
+}
+
+static bool test_stl_allocator5() {
+  std::vector<int, mi_heap_destroy_stl_allocator<int> > vec;
+  vec.push_back(1);
+  vec.pop_back();
+  return vec.size() == 0;
+}
+
+static bool test_stl_allocator6() {
+  std::vector<some_struct, mi_heap_destroy_stl_allocator<some_struct> > vec;
+  vec.push_back(some_struct());
+  vec.pop_back();
+  return vec.size() == 0;
+}
+#endif
+
+static void test_stl_allocators() {
+  test_stl_allocator1();
+  test_stl_allocator2();
+#if MI_HAS_HEAP_STL_ALLOCATOR
+  test_stl_allocator3();
+  test_stl_allocator4();
+  test_stl_allocator5();
+  test_stl_allocator6();
+#endif
+}
+
+#if 0
+#include <algorithm>
+#include <chrono>
+#include <functional>
+#include <iostream>
+#include <thread>
+#include <vector>
+
+static void test_mixed0() {
+    std::vector<std::unique_ptr<std::size_t>> numbers(1024 * 1024 * 100);
+    std::vector<std::thread> threads(1);
+
+    std::atomic<std::size_t> index{};
+
+    auto start = std::chrono::system_clock::now();
+
+    for (auto& thread : threads) {
+        thread = std::thread{[&index, &numbers]() {
+            while (true) {
+                auto i = index.fetch_add(1, std::memory_order_relaxed);
+                if (i >= numbers.size()) return;
+
+                numbers[i] = std::make_unique<std::size_t>(i);
+            }
+        }};
+    }
+
+    for (auto& thread : threads) thread.join();
+
+    auto end = std::chrono::system_clock::now();
+
+    auto duration =
+        std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
+    std::cout << "Running on " << threads.size() << " threads took " << duration
+              << std::endl;
+}
+#endif
+
+void asd() {
+  void* p = malloc(128);
+  free(p);
+}
+static void test_mixed1() {
+    std::thread thread(asd);
+    thread.join();
+}
+
+#if 0
+// issue #691
+static char* cptr;
+
+static void* thread1_allocate()
+{
+  cptr = mi_calloc_tp(char,22085632);
+  return NULL;
+}
+
+static void* thread2_free()
+{
+  assert(cptr);
+  mi_free(cptr);
+  cptr = NULL;
+  return NULL;
+}
+
+static void test_large_migrate(void) {
+  auto t1 = std::thread(thread1_allocate);
+  t1.join();
+  auto t2 = std::thread(thread2_free);
+  t2.join();
+  /*
+  pthread_t thread1, thread2;
+
+  pthread_create(&thread1, NULL, &thread1_allocate, NULL);
+  pthread_join(thread1, NULL);
+
+  pthread_create(&thread2, NULL, &thread2_free, NULL);
+  pthread_join(thread2, NULL);
+  */
+  return;
+}
+#endif
+
+// issue 445
+static void strdup_test() {
+#ifdef _MSC_VER
+  char* s = _strdup("hello\n");
+  char* buf = NULL;
+  size_t len;
+  _dupenv_s(&buf, &len, "MIMALLOC_VERBOSE");
+  mi_free(buf);
+  mi_free(s);
+#endif
+}
+
+// Issue #202
+static void heap_no_delete_worker() {
+  mi_heap_t* heap = mi_heap_new();
+  void* q = mi_heap_malloc(heap, 1024); (void)(q);
+  // mi_heap_delete(heap); // uncomment to prevent assertion
+}
+
+static void heap_no_delete() {
+  auto t1 = std::thread(heap_no_delete_worker);
+  t1.join();
+}
+
+
+// Issue #697
+static void test_std_string() {
+  std::string path = "/Users/xxxx/Library/Developer/Xcode/DerivedData/xxxxxxxxxx/Build/Intermediates.noindex/xxxxxxxxxxx/arm64/XX_lto.o/0.arm64.lto.o";
+  std::string path1 = "/Users/xxxx/Library/Developer/Xcode/DerivedData/xxxxxxxxxx/Build/Intermediates.noindex/xxxxxxxxxxx/arm64/XX_lto.o/1.arm64.lto.o";
+  std::cout << path + "\n>>>            " + path1 + "\n>>>            " << std::endl;
+}
+
+// Issue #204
+static volatile void* global_p;
+
+static void t1main() {
+  mi_heap_t* heap = mi_heap_new();
+  global_p = mi_heap_malloc(heap, 1024);
+  mi_heap_delete(heap);
+}
+
+static void theap_late_free() {
+  auto t1 = std::thread(t1main);
+
+  msleep(2000);
+  assert(global_p);
+  mi_free((void*)global_p);
+
+  t1.join();
+}
+
+// issue  #209
+static void* shared_p;
+static void alloc0(/* void* arg */)
+{
+  shared_p = mi_malloc(8);
+}
+
+static void padding_shrink(void)
+{
+  auto t1 = std::thread(alloc0);
+  t1.join();
+  mi_free(shared_p);
+}
+
+
+// Issue #221
+static void theap_thread_free_large_worker() {
+  mi_free(shared_p);
+}
+
+static void theap_thread_free_large() {
+  for (int i = 0; i < 100; i++) {
+    shared_p = mi_malloc_aligned(2*1024*1024 + 1, 8);
+    auto t1 = std::thread(theap_thread_free_large_worker);
+    t1.join();
+  }
+}
+
+static void theap_thread_free_huge_worker() {
+  mi_free(shared_p);
+}
+
+static void theap_thread_free_huge() {
+  for (int i = 0; i < 10; i++) {
+    shared_p = mi_malloc(1024 * 1024 * 1024);
+    auto t1 = std::thread(theap_thread_free_huge_worker);
+    t1.join();
+  }
+}
+
+static std::atomic<long> xgsum;
+
+static void local_alloc() {
+  long sum = 0;
+  for(int i = 0; i < 1000000; i++) {
+    const int n = 1 + std::rand() % 1000;
+    uint8_t* p = (uint8_t*)calloc(n, 1);
+    p[0] = 1;
+    sum += p[std::rand() % n];
+    if ((std::rand() % 100) > 24) {
+      free(p);
+    }
+  }
+  xgsum += sum;
+}
+
+static void test_thread_leak() {
+  std::vector<std::thread> threads;
+  for (int i=1; i<=100; ++i) {
+    threads.emplace_back(std::thread(&local_alloc));
+  }
+  for (auto& th : threads) {
+    th.join();
+  }
+}
+
+static void test_mt_shutdown()
+{
+  const int threads = 5;
+  std::vector< std::future< std::vector< char* > > > ts;
+
+  auto fn = [&]()
+  {
+    std::vector< char* > ps;
+    ps.reserve(1000);
+    for (int i = 0; i < 1000; i++)
+      ps.emplace_back(new char[1]);
+    return ps;
+  };
+
+  for (int i = 0; i < threads; i++)
+    ts.emplace_back(std::async(std::launch::async, fn));
+
+  for (auto& f : ts)
+    for (auto& p : f.get())
+      delete[] p;
+
+  std::cout << "done" << std::endl;
+}
+
+// issue #372
+static void fail_aslr() {
+  size_t sz = (size_t)(4ULL << 40); // 4TiB
+  void* p = malloc(sz);
+  printf("pointer p: %p: area up to %p\n", p, (uint8_t*)p + sz);
+  *(int*)0x5FFFFFFF000 = 0;  // should segfault
+}
+
+// issues #414
+static void dummy_worker() {
+  void* p = mi_malloc(0);
+  mi_free(p);
+}
+
+static void tsan_numa_test() {
+  auto t1 = std::thread(dummy_worker);
+  dummy_worker();
+  t1.join();
+}
+
+
+class MTest
+{
+    char *data;
+public:
+    MTest() { data = (char*)malloc(1024); }
+    ~MTest() { free(data); };
+};
+
+thread_local MTest tlVariable;
+
+void threadFun( int i )
+{
+    printf( "Thread %d\n", i );
+    std::this_thread::sleep_for( std::chrono::milliseconds(100) );
+}
+
+void test_thread_local()
+{
+    for( int i=1; i < 100; ++i )
+    {
+        std::thread t( threadFun, i );
+        t.join();
+        mi_stats_print(NULL);
+    }
+    return;
+}
+
+// issue #1177
+thread_local void* s_ptr = mi_malloc(1);
+
+void test_join() {
+  std::thread thread([]() { mi_free(s_ptr); });
+  thread.join();
+  mi_free(s_ptr);
+}
+
+
+static std::atomic<long> gsum;
+
+const int LEN[] = { 1000, 5000, 10000, 50000 };
+
+// adapted from example in
+// https://github.com/microsoft/mimalloc/issues/1104
+
+static void test_perf_local_alloc()
+{
+  // thread-local random number generator
+  std::minstd_rand rng(std::random_device{}());
+
+  long sum = 0;
+  for (int i = 0; i < 1000000; i++)
+  {
+    int len = LEN[rng() % 4];
+    int* p = (int*)mi_zalloc_aligned(len * sizeof(int), alignof(int));
+    p[0] = 1;
+    sum += p[rng() % len];
+    free(p);
+  }
+  std::cout << ".";
+  gsum += sum;
+}
+
+static void test_perf_run()
+{
+  std::vector<std::thread> threads;
+  for (int i = 0; i < 24; ++i)
+  {
+    threads.emplace_back(std::thread(&test_perf_local_alloc));
+  }
+  for (auto& th : threads)
+  {
+    th.join();
+  }
+  std::cout << "\n";
+}
+
+void test_perf(void)
+{
+  test_perf_run();
+  std::cout << "gsum: " << gsum.load() << "\n";
+}
diff --git a/3rd/mimalloc-2.0.9/test/main.c b/3rd/mimalloc-3.2.6/test/main.c
similarity index 71%
rename from 3rd/mimalloc-2.0.9/test/main.c
rename to 3rd/mimalloc-3.2.6/test/main.c
index b148f712..d18d0aa9 100644
--- a/3rd/mimalloc-2.0.9/test/main.c
+++ b/3rd/mimalloc-3.2.6/test/main.c
@@ -2,13 +2,13 @@
 #include <assert.h>
 #include <mimalloc.h>
 
-void test_heap(void* p_out) {
-  mi_heap_t* heap = mi_heap_new();
-  void* p1 = mi_heap_malloc(heap,32);
-  void* p2 = mi_heap_malloc(heap,48);
+void test_theap(void* p_out) {
+  mi_theap_t* theap = mi_theap_new();
+  void* p1 = mi_theap_malloc(theap,32);
+  void* p2 = mi_theap_malloc(theap,48);
   mi_free(p_out);
-  mi_heap_destroy(heap);
-  //mi_heap_delete(heap); mi_free(p1); mi_free(p2);
+  mi_theap_destroy(theap);
+  //mi_theap_delete(theap); mi_free(p1); mi_free(p2);
 }
 
 void test_large() {
@@ -32,7 +32,7 @@ int main() {
   mi_free(p1);
   mi_free(p2);
 
-  test_heap(mi_malloc(32));
+  test_theap(mi_malloc(32));
 
   p1 = mi_malloc_aligned(64, 16);
   p2 = mi_malloc_aligned(160,24);
diff --git a/3rd/mimalloc-2.0.9/test/readme.md b/3rd/mimalloc-3.2.6/test/readme.md
similarity index 100%
rename from 3rd/mimalloc-2.0.9/test/readme.md
rename to 3rd/mimalloc-3.2.6/test/readme.md
diff --git a/3rd/mimalloc-2.0.9/test/test-api-fill.c b/3rd/mimalloc-3.2.6/test/test-api-fill.c
similarity index 98%
rename from 3rd/mimalloc-2.0.9/test/test-api-fill.c
rename to 3rd/mimalloc-3.2.6/test/test-api-fill.c
index 85d8524f..eebbd394 100644
--- a/3rd/mimalloc-2.0.9/test/test-api-fill.c
+++ b/3rd/mimalloc-3.2.6/test/test-api-fill.c
@@ -5,7 +5,7 @@ terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
 -----------------------------------------------------------------------------*/
 #include "mimalloc.h"
-#include "mimalloc-types.h"
+#include "mimalloc/types.h"
 
 #include "testhelper.h"
 
@@ -164,7 +164,7 @@ int main(void) {
     mi_free(p);
   };
 
-#if MI_DEBUG >= 2
+#if (MI_DEBUG >= 2) && !MI_TSAN
   // ---------------------------------------------------
   // Debug filling
   // ---------------------------------------------------
@@ -271,7 +271,7 @@ int main(void) {
     mi_free(p);
   };
 
-
+  #if !(MI_TRACK_VALGRIND || MI_TRACK_ASAN || MI_GUARDED)
   CHECK_BODY("fill-freed-small") {
     size_t malloc_size = MI_SMALL_SIZE_MAX / 2;
     uint8_t* p = (uint8_t*)mi_malloc(malloc_size);
@@ -286,6 +286,7 @@ int main(void) {
     // First sizeof(void*) bytes will contain housekeeping data, skip these
     result = check_debug_fill_freed(p + sizeof(void*), malloc_size - sizeof(void*));
   };
+  #endif
 #endif
 
   // ---------------------------------------------------
@@ -309,7 +310,7 @@ bool check_zero_init(uint8_t* p, size_t size) {
 
 #if MI_DEBUG >= 2
 bool check_debug_fill_uninit(uint8_t* p, size_t size) {
-#if MI_VALGRIND
+#if MI_TRACK_VALGRIND || MI_TRACK_ASAN
   (void)p; (void)size;
   return true; // when compiled with valgrind we don't init on purpose
 #else
@@ -325,7 +326,7 @@ bool check_debug_fill_uninit(uint8_t* p, size_t size) {
 }
 
 bool check_debug_fill_freed(uint8_t* p, size_t size) {
-#if MI_VALGRIND
+#if MI_TRACK_VALGRIND
   (void)p; (void)size;
   return true; // when compiled with valgrind we don't fill on purpose
 #else
diff --git a/3rd/mimalloc-2.0.9/test/test-api.c b/3rd/mimalloc-3.2.6/test/test-api.c
similarity index 54%
rename from 3rd/mimalloc-2.0.9/test/test-api.c
rename to 3rd/mimalloc-3.2.6/test/test-api.c
index 884a433b..7ddfef12 100644
--- a/3rd/mimalloc-2.0.9/test/test-api.c
+++ b/3rd/mimalloc-3.2.6/test/test-api.c
@@ -33,25 +33,49 @@ we therefore test the API over various inputs. Please add more tests :-)
 #endif
 
 #include "mimalloc.h"
-// #include "mimalloc-internal.h"
-#include "mimalloc-types.h" // for MI_DEBUG and MI_ALIGNMENT_MAX
+// #include "mimalloc/internal.h"
+#include "mimalloc/types.h" // for MI_DEBUG and MI_PAGE_MAX_OVERALLOC_ALIGN
 
 #include "testhelper.h"
 
 // ---------------------------------------------------------------------------
 // Test functions
 // ---------------------------------------------------------------------------
-bool test_heap1(void);
-bool test_heap2(void);
+bool test_theap1(void);
+bool test_theap2(void);
+bool test_theap_arena_destroy(void);
+bool test_theap_arena_delete(void);
 bool test_stl_allocator1(void);
 bool test_stl_allocator2(void);
 
+bool test_stl_theap_allocator1(void);
+bool test_stl_theap_allocator2(void);
+bool test_stl_theap_allocator3(void);
+bool test_stl_theap_allocator4(void);
+
+bool mem_is_zero(uint8_t* p, size_t size) {
+  if (p==NULL) return false;
+  for (size_t i = 0; i < size; ++i) {
+    if (p[i] != 0) return false;
+  }
+  return true;
+}
+
 // ---------------------------------------------------------------------------
 // Main testing
 // ---------------------------------------------------------------------------
 int main(void) {
   mi_option_disable(mi_option_verbose);
 
+  CHECK_BODY("malloc-aligned9a") { // test large alignments
+    void* p = mi_zalloc_aligned(1024 * 1024, 2);
+    mi_free(p);
+    p = mi_zalloc_aligned(1024 * 1024, 2);
+    mi_free(p);
+    result = true;
+  };
+
+
   // ---------------------------------------------------
   // Malloc
   // ---------------------------------------------------
@@ -64,9 +88,14 @@ int main(void) {
   CHECK_BODY("malloc-nomem1") {
     result = (mi_malloc((size_t)PTRDIFF_MAX + (size_t)1) == NULL);
   };
-  CHECK_BODY("malloc-null") {
+  CHECK_BODY("malloc-free-null") {
     mi_free(NULL);
   };
+  #if MI_INTPTR_BITS > 32
+  CHECK_BODY("malloc-free-invalid-low") {
+    mi_free((void*)(MI_ZU(0x0000000003990080))); // issue #1087
+  };
+  #endif
   CHECK_BODY("calloc-overflow") {
     // use (size_t)&mi_calloc to get some number without triggering compiler warnings
     result = (mi_calloc((size_t)&mi_calloc,SIZE_MAX/1000) == NULL);
@@ -141,15 +170,17 @@ int main(void) {
     void* p = mi_malloc_aligned(4097,4096);
     size_t usable = mi_usable_size(p);
     result = (usable >= 4097 && usable < 16000);
-    printf("malloc_aligned5: usable size: %zi\n", usable);
+    fprintf(stderr, "malloc_aligned5: usable size: %zi.  ", usable);
     mi_free(p);
   };
+  /*
   CHECK_BODY("malloc-aligned6") {
     bool ok = true;
-    for (size_t align = 1; align <= MI_ALIGNMENT_MAX && ok; align *= 2) {
+    for (size_t align = 1; align <= MI_PAGE_MAX_OVERALLOC_ALIGN && ok; align *= 2) {
       void* ps[8];
       for (int i = 0; i < 8 && ok; i++) {
-        ps[i] = mi_malloc_aligned(align*5 /*size*/, align);
+        ps[i] = mi_malloc_aligned(align*13  // size
+                                 , align);
         if (ps[i] == NULL || (uintptr_t)(ps[i]) % align != 0) {
           ok = false;
         }
@@ -160,26 +191,34 @@ int main(void) {
     }
     result = ok;
   };
+  */
   CHECK_BODY("malloc-aligned7") {
-    void* p = mi_malloc_aligned(1024,MI_ALIGNMENT_MAX);
+    void* p = mi_malloc_aligned(1024,MI_PAGE_MAX_OVERALLOC_ALIGN);
     mi_free(p);
-    result = ((uintptr_t)p % MI_ALIGNMENT_MAX) == 0;
+    result = ((uintptr_t)p % MI_PAGE_MAX_OVERALLOC_ALIGN) == 0;
   };
   CHECK_BODY("malloc-aligned8") {
     bool ok = true;
     for (int i = 0; i < 5 && ok; i++) {
       int n = (1 << i);
-      void* p = mi_malloc_aligned(1024, n * MI_ALIGNMENT_MAX);
-      ok = ((uintptr_t)p % (n*MI_ALIGNMENT_MAX)) == 0;
+      void* p = mi_malloc_aligned(1024, n * MI_PAGE_MAX_OVERALLOC_ALIGN);
+      ok = ((uintptr_t)p % (n*MI_PAGE_MAX_OVERALLOC_ALIGN)) == 0;
       mi_free(p);
     }
     result = ok;
   };
-  CHECK_BODY("malloc-aligned9") {
+  CHECK_BODY("malloc-aligned9") { // test large alignments
     bool ok = true;
     void* p[8];
-    size_t sizes[8] = { 8, 512, 1024 * 1024, MI_ALIGNMENT_MAX, MI_ALIGNMENT_MAX + 1, 2 * MI_ALIGNMENT_MAX, 8 * MI_ALIGNMENT_MAX, 0 };
-    for (int i = 0; i < 28 && ok; i++) {
+    const int max_align_shift =
+      #if SIZE_MAX > UINT32_MAX
+      28
+      #else
+      20
+      #endif
+      ;
+    size_t sizes[8] = { 8, 512, 1024 * 1024, MI_PAGE_MAX_OVERALLOC_ALIGN, MI_PAGE_MAX_OVERALLOC_ALIGN + 1, 2 * MI_PAGE_MAX_OVERALLOC_ALIGN, 8 * MI_PAGE_MAX_OVERALLOC_ALIGN, 0 };
+    for (int i = 0; i < max_align_shift && ok; i++) {
       int align = (1 << i);
       for (int j = 0; j < 8 && ok; j++) {
         p[j] = mi_zalloc_aligned(sizes[j], align);
@@ -205,11 +244,38 @@ int main(void) {
     }
     result = ok;
   }
-  CHECK_BODY("malloc_aligned11") {
-    mi_heap_t* heap = mi_heap_new();
-    void* p = mi_heap_malloc_aligned(heap, 33554426, 8);
-    result = mi_heap_contains_block(heap, p);
-    mi_heap_destroy(heap);
+  //CHECK_BODY("malloc_aligned11") {
+  //  mi_theap_t* theap = mi_theap_new();
+  //  void* p = mi_theap_malloc_aligned(theap, 33554426, 8);
+  //  result = mi_theap_contains_block(theap, p);
+  //  mi_theap_destroy(theap);
+  //}
+  CHECK_BODY("mimalloc-aligned12") {
+    void* p = mi_malloc_aligned(0x100, 0x100);
+    result = (((uintptr_t)p % 0x100) == 0); // #602
+    mi_free(p);
+  }
+  CHECK_BODY("mimalloc-aligned13") {
+    bool ok = true;
+    for( size_t size = 1; size <= (MI_SMALL_SIZE_MAX * 2) && ok; size++ ) {
+      for(size_t align = 1; align <= size && ok; align *= 2 ) {
+        void* p[10];
+        for(int i = 0; i < 10 && ok; i++) {
+          p[i] = mi_malloc_aligned(size,align);;
+          ok = (p[i] != NULL && ((uintptr_t)(p[i]) % align) == 0);
+        }
+        for(int i = 0; i < 10 && ok; i++) {
+          mi_free(p[i]);
+        }
+        /*
+        if (ok && align <= size && ((size + MI_PADDING_SIZE) & (align-1)) == 0) {
+          size_t bsize = mi_good_size(size);
+          ok = (align <= bsize && (bsize & (align-1)) == 0);
+        }
+        */
+      }
+    }
+    result = ok;
   }
   CHECK_BODY("malloc-aligned-at1") {
     void* p = mi_malloc_aligned_at(48,32,0); result = (p != NULL && ((uintptr_t)(p) + 0) % 32 == 0); mi_free(p);
@@ -226,6 +292,21 @@ int main(void) {
     }
     result = ok;
   };
+  CHECK_BODY("zalloc-aligned-small1") {
+    size_t zalloc_size = MI_SMALL_SIZE_MAX / 2;
+    uint8_t* p = (uint8_t*)mi_zalloc_aligned(zalloc_size, MI_MAX_ALIGN_SIZE * 2);
+    result = mem_is_zero(p, zalloc_size);
+    mi_free(p);
+  };
+  CHECK_BODY("rezalloc_aligned-small1") {
+    size_t zalloc_size = MI_SMALL_SIZE_MAX / 2;
+    uint8_t* p = (uint8_t*)mi_zalloc_aligned(zalloc_size, MI_MAX_ALIGN_SIZE * 2);
+    result = mem_is_zero(p, zalloc_size);
+    zalloc_size *= 3;
+    p = (uint8_t*)mi_rezalloc_aligned(p, zalloc_size, MI_MAX_ALIGN_SIZE * 2);
+    result = result && mem_is_zero(p, zalloc_size);
+    mi_free(p);
+  };
 
   // ---------------------------------------------------
   // Reallocation
@@ -255,26 +336,54 @@ int main(void) {
     mi_free(p);
   };
 
+  // ---------------------------------------------------
+  // Returned block sizes
+  // ---------------------------------------------------
+  CHECK_BODY("umalloc1") {
+    for(size_t size = 1; size <= 32*MI_MiB; size *= 2 ) {
+      size_t bsize;
+      void* p = mi_umalloc(size,&bsize);
+      assert(bsize >= size);
+      size_t pre_size;
+      size_t post_size;
+      p = mi_urealloc(p, size + 1024, &pre_size, &post_size);
+      assert(pre_size == bsize);
+      assert(post_size >= size + 1024);
+      size_t fsize;
+      mi_ufree(p,&fsize);
+      assert(fsize == post_size);
+    }
+  }
+
   // ---------------------------------------------------
   // Heaps
   // ---------------------------------------------------
-  CHECK("heap_destroy", test_heap1());
-  CHECK("heap_delete", test_heap2());
+  //CHECK("theap_destroy", test_theap1());
+  //CHECK("theap_delete", test_theap2());
+  //CHECK("theap_arena_destroy", test_theap_arena_destroy());
+  //CHECK("theap_arena_delete", test_theap_arena_delete());
 
   //mi_stats_print(NULL);
 
   // ---------------------------------------------------
   // various
   // ---------------------------------------------------
+  #if !defined(MI_TRACK_ASAN)   // realpath may leak with ASAN enabled (as the ASAN allocator intercepts it)
   CHECK_BODY("realpath") {
     char* s = mi_realpath( ".", NULL );
     // printf("realpath: %s\n",s);
     mi_free(s);
   };
+  #endif
 
   CHECK("stl_allocator1", test_stl_allocator1());
   CHECK("stl_allocator2", test_stl_allocator2());
 
+	//CHECK("stl_theap_allocator1", test_stl_theap_allocator1());
+	//CHECK("stl_theap_allocator2", test_stl_theap_allocator2());
+	//CHECK("stl_theap_allocator3", test_stl_theap_allocator3());
+	//CHECK("stl_theap_allocator4", test_stl_theap_allocator4());
+
   // ---------------------------------------------------
   // Done
   // ---------------------------------------------------[]
@@ -285,27 +394,54 @@ int main(void) {
 // Larger test functions
 // ---------------------------------------------------
 
-bool test_heap1() {
-  mi_heap_t* heap = mi_heap_new();
-  int* p1 = mi_heap_malloc_tp(heap,int);
-  int* p2 = mi_heap_malloc_tp(heap,int);
+/*
+bool test_theap1(void) {
+  mi_theap_t* theap = mi_theap_new();
+  int* p1 = mi_theap_malloc_tp(theap,int);
+  int* p2 = mi_theap_malloc_tp(theap,int);
   *p1 = *p2 = 43;
-  mi_heap_destroy(heap);
+  mi_theap_destroy(theap);
   return true;
 }
 
-bool test_heap2() {
-  mi_heap_t* heap = mi_heap_new();
-  int* p1 = mi_heap_malloc_tp(heap,int);
-  int* p2 = mi_heap_malloc_tp(heap,int);
-  mi_heap_delete(heap);
+bool test_theap2(void) {
+  mi_theap_t* theap = mi_theap_new();
+  int* p1 = mi_theap_malloc_tp(theap,int);
+  int* p2 = mi_theap_malloc_tp(theap,int);
+  mi_theap_delete(theap);
   *p1 = 42;
   mi_free(p1);
   mi_free(p2);
   return true;
 }
 
-bool test_stl_allocator1() {
+bool test_theap_arena_destroy(void) {
+  mi_arena_id_t arena_id = NULL;
+  if (mi_reserve_os_memory_ex(64 * 1024 * 1024, true, false, true, &arena_id) != 0) {
+    return false;
+  }
+  mi_theap_t* theap = mi_theap_new_ex(0, true, arena_id);
+  if (theap == NULL) {
+    return false;
+  }
+  mi_theap_destroy(theap);
+  return true;
+}
+
+bool test_theap_arena_delete(void) {
+  mi_arena_id_t arena_id = NULL;
+  if (mi_reserve_os_memory_ex(64 * 1024 * 1024, true, false, true, &arena_id) != 0) {
+    return false;
+  }
+  mi_theap_t* theap = mi_theap_new_ex(0, true, arena_id);
+  if (theap == NULL) {
+    return false;
+  }
+  mi_theap_delete(theap);
+  return true;
+}
+*/
+bool test_stl_allocator1(void) {
 #ifdef __cplusplus
   std::vector<int, mi_stl_allocator<int> > vec;
   vec.push_back(1);
@@ -318,7 +454,7 @@ bool test_stl_allocator1() {
 
 struct some_struct  { int i; int j; double z; };
 
-bool test_stl_allocator2() {
+bool test_stl_allocator2(void) {
 #ifdef __cplusplus
   std::vector<some_struct, mi_stl_allocator<some_struct> > vec;
   vec.push_back(some_struct());
@@ -328,3 +464,63 @@ bool test_stl_allocator2() {
   return true;
 #endif
 }
+
+/*
+bool test_stl_theap_allocator1(void) {
+#ifdef __cplusplus
+  std::vector<some_struct, mi_theap_stl_allocator<some_struct> > vec;
+  vec.push_back(some_struct());
+  vec.pop_back();
+  return vec.size() == 0;
+#else
+  return true;
+#endif
+}
+
+bool test_stl_theap_allocator2(void) {
+#ifdef __cplusplus
+  std::vector<some_struct, mi_theap_destroy_stl_allocator<some_struct> > vec;
+  vec.push_back(some_struct());
+  vec.pop_back();
+  return vec.size() == 0;
+#else
+  return true;
+#endif
+}
+
+bool test_stl_theap_allocator3(void) {
+#ifdef __cplusplus
+	mi_theap_t* theap = mi_theap_new();
+	bool good = false;
+	{
+		mi_theap_stl_allocator<some_struct> myAlloc(theap);
+		std::vector<some_struct, mi_theap_stl_allocator<some_struct> > vec(myAlloc);
+		vec.push_back(some_struct());
+		vec.pop_back();
+		good = vec.size() == 0;
+	}
+	mi_theap_delete(theap);
+  return good;
+#else
+  return true;
+#endif
+}
+
+bool test_stl_theap_allocator4(void) {
+#ifdef __cplusplus
+	mi_theap_t* theap = mi_theap_new();
+	bool good = false;
+	{
+		mi_theap_destroy_stl_allocator<some_struct> myAlloc(theap);
+		std::vector<some_struct, mi_theap_destroy_stl_allocator<some_struct> > vec(myAlloc);
+		vec.push_back(some_struct());
+		vec.pop_back();
+		good = vec.size() == 0;
+	}
+	mi_theap_destroy(theap);
+  return good;
+#else
+  return true;
+#endif
+}
+*/
diff --git a/3rd/mimalloc-2.0.9/test/test-stress.c b/3rd/mimalloc-3.2.6/test/test-stress.c
similarity index 58%
rename from 3rd/mimalloc-2.0.9/test/test-stress.c
rename to 3rd/mimalloc-3.2.6/test/test-stress.c
index 133ba50c..6cd3db83 100644
--- a/3rd/mimalloc-2.0.9/test/test-stress.c
+++ b/3rd/mimalloc-3.2.6/test/test-stress.c
@@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018-2020 Microsoft Research, Daan Leijen
+Copyright (c) 2018-2025 Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license.
 -----------------------------------------------------------------------------*/
@@ -7,7 +7,7 @@ terms of the MIT license.
 /* This is a stress test for the allocator, using multiple threads and
    transferring objects between threads. It tries to reflect real-world workloads:
    - allocation size is distributed linearly in powers of two
-   - with some fraction extra large (and some extra extra large)
+   - with some fraction extra large (and some very large)
    - the allocations are initialized and read again at free
    - pointers transfer between threads
    - threads are terminated and recreated with some objects surviving in between
@@ -20,35 +20,91 @@ terms of the MIT license.
 #include <stdint.h>
 #include <stdbool.h>
 #include <string.h>
+#include <assert.h>
+
+// #define MI_GUARDED         1
+// #define USE_STD_MALLOC     1
+
+#ifndef USE_STD_MALLOC
+#define MI_USE_HEAPS       4
+#endif
 
 // > mimalloc-test-stress [THREADS] [SCALE] [ITER]
 //
 // argument defaults
+#if defined(MI_TSAN)          // with thread-sanitizer reduce the threads to test within the azure pipeline limits
+static int THREADS = 8;
+static int SCALE   = 25;
+static int ITER    = 400;
+#elif defined(MI_UBSAN)       // with undefined behavious sanitizer reduce parameters to stay within the azure pipeline limits
+static int THREADS = 8;
+static int SCALE   = 25;
+static int ITER    = 20;
+#elif defined(MI_GUARDED)     // with debug guard pages reduce parameters to stay within the azure pipeline limits
+static int THREADS = 8;
+static int SCALE   = 10;
+static int ITER    = 10;
+#elif  0
+static int THREADS = 4;
+static int SCALE   = 10;
+static int ITER    = 20;
+#elif 0
+static int THREADS = 32;
+static int SCALE   = 50;
+static int ITER    = 50;
+#elif 0
+static int THREADS = 32;
+static int SCALE   = 25;
+static int ITER    = 50;
+#define ALLOW_LARGE true
+#else
 static int THREADS = 32;      // more repeatable if THREADS <= #processors
-static int SCALE   = 25;      // scaling factor
+static int SCALE   = 50;      // scaling factor
 static int ITER    = 50;      // N full iterations destructing and re-creating all threads
+#endif
+
 
-// static int THREADS = 8;    // more repeatable if THREADS <= #processors
-// static int SCALE   = 100;  // scaling factor
 
-#define STRESS   // undefine for leak test
+#define STRESS                // undefine for leak test
+
+#ifndef ALLOW_LARGE
+#define ALLOW_LARGE  false
+#endif
 
-static bool   allow_large_objects = true;    // allow very large objects?
-static size_t use_one_size = 0;              // use single object size of `N * sizeof(uintptr_t)`?
+static bool   allow_large_objects = ALLOW_LARGE;    // allow very large objects? (set to `true` if SCALE>100)
 
+static size_t use_one_size = 0;               // use single object size of `N * sizeof(uintptr_t)`?
+
+static bool   main_participates = false;       // main thread participates as a worker too
 
-// #define USE_STD_MALLOC
 #ifdef USE_STD_MALLOC
-#define custom_calloc(n,s)    malloc(n*s)
+
+#define custom_calloc(n,s)    calloc(n,s)
 #define custom_realloc(p,s)   realloc(p,s)
 #define custom_free(p)        free(p)
+
 #else
+
 #include <mimalloc.h>
-#define custom_calloc(n,s)    mi_malloc(n*s)
+#include <mimalloc-stats.h>
+
+#ifdef MI_USE_HEAPS
+static mi_heap_t* current_heap;
+#define custom_calloc(n,s)    mi_heap_calloc(current_heap,n,s)
+#define custom_realloc(p,s)   mi_heap_realloc(current_heap,p,s)
+#define custom_free(p)        mi_free(p)
+#else
+#define custom_calloc(n,s)    mi_calloc(n,s)
 #define custom_realloc(p,s)   mi_realloc(p,s)
 #define custom_free(p)        mi_free(p)
 #endif
 
+#ifndef NDEBUG
+#define xMI_HEAP_WALK             // walk the theap objects?
+#endif
+
+#endif
+
 // transfer pointer between threads
 #define TRANSFERS     (1000)
 static volatile void* transfer[TRANSFERS];
@@ -91,16 +147,17 @@ static bool chance(size_t perc, random_t r) {
 
 static void* alloc_items(size_t items, random_t r) {
   if (chance(1, r)) {
-    if (chance(1, r) && allow_large_objects) items *= 50000;       // 0.01% giant
+    if (chance(1, r) && allow_large_objects) items *= 10000;       // 0.01% giant
     else if (chance(10, r) && allow_large_objects) items *= 1000;  // 0.1% huge
     else items *= 100;                                             // 1% large objects;
   }
-  if (items == 40) items++;              // pthreads uses that size for stack increases
+  if (items>=32 && items<=40) items*=2;              // pthreads uses 320b allocations (this shows that more clearly in the stats)
   if (use_one_size > 0) items = (use_one_size / sizeof(uintptr_t));
-  if (items==0) items = 1;
+  if (items==0) items = 1;  
   uintptr_t* p = (uintptr_t*)custom_calloc(items,sizeof(uintptr_t));
   if (p != NULL) {
     for (uintptr_t i = 0; i < items; i++) {
+      assert(p[i] == 0);
       p[i] = (items - i) ^ cookie;
     }
   }
@@ -121,6 +178,16 @@ static void free_items(void* p) {
   custom_free(p);
 }
 
+#ifdef MI_HEAP_WALK
+static bool visit_blocks(const mi_theap_t* theap, const mi_theap_area_t* area, void* block, size_t block_size, void* arg) {
+  (void)(theap); (void)(area);
+  size_t* total = (size_t*)arg;
+  if (block != NULL) {
+    *total += block_size;
+  }
+  return true;
+}
+#endif
 
 static void stress(intptr_t tid) {
   //bench_start_thread();
@@ -165,6 +232,13 @@ static void stress(intptr_t tid) {
       data[data_idx] = q;
     }
   }
+
+  #ifdef MI_HEAP_WALK
+  // walk the theap
+  size_t total = 0;
+  mi_theap_visit_blocks(mi_theap_get_default(), true, visit_blocks, &total);
+  #endif
+
   // free everything that is left
   for (size_t i = 0; i < retain_top; i++) {
     free_items(retained[i]);
@@ -180,23 +254,73 @@ static void stress(intptr_t tid) {
 static void run_os_threads(size_t nthreads, void (*entry)(intptr_t tid));
 
 static void test_stress(void) {
+  #ifdef MI_USE_HEAPS
+  mi_heap_t* prev_heaps[MI_USE_HEAPS] = { NULL };
+  #endif
   uintptr_t r = rand();
   for (int n = 0; n < ITER; n++) {
-    run_os_threads(THREADS, &stress);    
+    
+    #ifdef MI_USE_HEAPS
+    // new heap for each iteration
+    if (prev_heaps[MI_USE_HEAPS-1] != NULL) {
+      mi_heap_delete(prev_heaps[MI_USE_HEAPS-1]);   // delete from N iterations ago
+    }
+    for(int i = MI_USE_HEAPS-1; i > 0; i--) {
+      prev_heaps[i] = prev_heaps[i-1];
+    }
+    prev_heaps[0] = current_heap;
+    current_heap = mi_heap_new();
+    #endif  
+
+    run_os_threads(THREADS, &stress);
+
+    #if !defined(NDEBUG) && !defined(USE_STD_MALLOC)
+    // switch between arena and OS allocation for testing
+    // mi_option_set_enabled(mi_option_disallow_arena_alloc, (n%2)==1);
+    #endif
+    #if defined(MI_HEAP_WALK) && defined(MI_USE_HEAPS)
+    size_t total = 0;
+    // mi_abandoned_visit_blocks(mi_subproc_main(), -1, true, visit_blocks, &total);
+    mi_heap_visit_blocks(heap, true, visit_blocks, &total);
+    #endif
+
     for (int i = 0; i < TRANSFERS; i++) {
       if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers
         void* p = atomic_exchange_ptr(&transfer[i], NULL);
         free_items(p);
       }
     }
-    #ifndef NDEBUG
-    //mi_collect(false);
-    //mi_debug_show_arenas();
-    #endif    
+    
     #if !defined(NDEBUG) || defined(MI_TSAN)
-    if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
+    if ((n + 1) % 10 == 0) {
+      printf("- iterations left: %3d\n", ITER - (n + 1));
+      #ifndef USE_STD_MALLOC
+      mi_debug_show_arenas();
+      #endif
+      //mi_collect(true);
+      //mi_debug_show_arenas();
+    }
     #endif
   }
+  
+  #ifndef USE_STD_MALLOC
+  mi_stats_print(NULL);
+  #endif
+  
+  // clean up  (a bit too early to test the final free_items still works correctly)
+  #ifdef MI_USE_HEAPS
+  for (int i = 0; i < MI_USE_HEAPS; i++) {
+    mi_heap_delete(prev_heaps[i]); prev_heaps[i] = NULL;
+  }
+  mi_heap_delete(current_heap); current_heap = NULL;
+  #endif
+
+  for (int i = 0; i < TRANSFERS; i++) {
+    void* p = atomic_exchange_ptr(&transfer[i], NULL);
+    if (p != NULL) {
+      free_items(p);
+    }
+  }
 }
 
 #ifndef STRESS
@@ -221,7 +345,29 @@ static void test_leak(void) {
 }
 #endif
 
+#if defined(USE_STD_MALLOC) && defined(MI_LINK_VERSION)
+#ifdef __cplusplus
+extern "C"
+#endif
+int mi_version(void);
+#endif
+
 int main(int argc, char** argv) {
+  #ifdef MI_LINK_VERSION
+    mi_version();
+  #endif
+  #ifdef MI_HEAP_WALK
+    mi_option_enable(mi_option_visit_abandoned);
+  #endif
+  #if !defined(NDEBUG) && !defined(USE_STD_MALLOC)
+    mi_option_set(mi_option_arena_reserve, 32 * 1024 /* in kib = 32MiB */);
+    mi_option_set(mi_option_purge_delay,1);
+  #endif
+  #if defined(NDEBUG) && !defined(USE_STD_MALLOC)
+    // mi_option_set(mi_option_purge_delay,-1);
+    mi_option_set(mi_option_page_reclaim_on_free, 0);
+  #endif
+
   // > mimalloc-test-stress [THREADS] [SCALE] [ITER]
   if (argc >= 2) {
     char* end;
@@ -238,7 +384,19 @@ int main(int argc, char** argv) {
     long n = (strtol(argv[3], &end, 10));
     if (n > 0) ITER = n;
   }
-  printf("Using %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER);
+  if (SCALE > 100) {
+    allow_large_objects = true;
+  }
+  printf("Using %d threads with a %d%% load-per-thread and %d iterations%s", THREADS, SCALE, ITER, (allow_large_objects ? " (allow large objects)" : ""));
+  #if MI_USE_HEAPS
+  printf(" (using %d rolling heaps)", MI_USE_HEAPS);
+  #endif
+  printf("\n");
+
+  #if !defined(NDEBUG) && !defined(USE_STD_MALLOC)
+  mi_stats_reset();
+  #endif
+
   //mi_reserve_os_memory(1024*1024*1024ULL, false, true);
   //int res = mi_reserve_huge_os_pages(4,1);
   //printf("(reserve huge: %i\n)", res);
@@ -247,24 +405,25 @@ int main(int argc, char** argv) {
 
   // Run ITER full iterations where half the objects in the transfer buffer survive to the next round.
   srand(0x7feb352d);
-  
-  //mi_reserve_os_memory(512ULL << 20, true, true);
-
-#if !defined(NDEBUG) && !defined(USE_STD_MALLOC)
-  mi_stats_reset();
-#endif
-
+  // mi_stats_reset();
 #ifdef STRESS
-  test_stress();
+    test_stress();
 #else
-  test_leak();
+    test_leak();
 #endif
 
 #ifndef USE_STD_MALLOC
   #ifndef NDEBUG
   mi_collect(true);
-  //mi_debug_show_arenas();
+  mi_debug_show_arenas();
+  //mi_collect(true);
+  //char* json = mi_stats_get_json(0, NULL);
+  //if (json != NULL) {
+  //  fputs(json,stderr);
+  //  mi_free(json);
+  //}
   #endif
+  mi_collect(true);
   mi_stats_print(NULL);
 #endif
   //bench_end_program();
@@ -276,7 +435,7 @@ static void (*thread_entry_fun)(intptr_t) = &stress;
 
 #ifdef _WIN32
 
-#include <Windows.h>
+#include <windows.h>
 
 static DWORD WINAPI thread_entry(LPVOID param) {
   thread_entry_fun((intptr_t)param);
@@ -287,13 +446,16 @@ static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) {
   thread_entry_fun = fun;
   DWORD* tids = (DWORD*)custom_calloc(nthreads,sizeof(DWORD));
   HANDLE* thandles = (HANDLE*)custom_calloc(nthreads,sizeof(HANDLE));
-  for (uintptr_t i = 0; i < nthreads; i++) {
-    thandles[i] = CreateThread(0, 8*1024, &thread_entry, (void*)(i), 0, &tids[i]);
+  thandles[0] = GetCurrentThread(); // avoid lint warning
+  const size_t start = (main_participates ? 1 : 0);
+  for (size_t i = start; i < nthreads; i++) {
+    thandles[i] = CreateThread(0, 8*1024L, &thread_entry, (void*)(i), 0, &tids[i]);
   }
-  for (size_t i = 0; i < nthreads; i++) {
+  if (main_participates) fun(0); // run the main thread as well
+  for (size_t i = start; i < nthreads; i++) {
     WaitForSingleObject(thandles[i], INFINITE);
   }
-  for (size_t i = 0; i < nthreads; i++) {
+  for (size_t i = start; i < nthreads; i++) {
     CloseHandle(thandles[i]);
   }
   custom_free(tids);
@@ -320,11 +482,13 @@ static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) {
   thread_entry_fun = fun;
   pthread_t* threads = (pthread_t*)custom_calloc(nthreads,sizeof(pthread_t));
   memset(threads, 0, sizeof(pthread_t) * nthreads);
+  const size_t start = (main_participates ? 1 : 0);
   //pthread_setconcurrency(nthreads);
-  for (size_t i = 0; i < nthreads; i++) {
+  for (size_t i = start; i < nthreads; i++) {
     pthread_create(&threads[i], NULL, &thread_entry, (void*)i);
   }
-  for (size_t i = 0; i < nthreads; i++) {
+  if (main_participates) fun(0); // run the main thread as well
+  for (size_t i = start; i < nthreads; i++) {
     pthread_join(threads[i], NULL);
   }
   custom_free(threads);
diff --git a/3rd/mimalloc-2.0.9/test/test-wrong.c b/3rd/mimalloc-3.2.6/test/test-wrong.c
similarity index 74%
rename from 3rd/mimalloc-2.0.9/test/test-wrong.c
rename to 3rd/mimalloc-3.2.6/test/test-wrong.c
index 17d253b6..56a2339a 100644
--- a/3rd/mimalloc-2.0.9/test/test-wrong.c
+++ b/3rd/mimalloc-3.2.6/test/test-wrong.c
@@ -5,11 +5,14 @@ terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
 -----------------------------------------------------------------------------*/
 
-/* test file for valgrind support.
+/* test file for valgrind/asan support.
+
+   VALGRIND:
+   ----------
    Compile in an "out/debug" folder:
 
    > cd out/debug
-   > cmake ../.. -DMI_VALGRIND=1
+   > cmake ../.. -DMI_TRACK_VALGRIND=1
    > make -j8
 
    and then compile this file as:
@@ -19,6 +22,25 @@ terms of the MIT license. A copy of the license can be found in the file
    and test as:
 
    > valgrind ./test-wrong
+
+   
+   ASAN
+   ----------
+   Compile in an "out/debug" folder:
+
+   > cd out/debug
+   > cmake ../.. -DMI_TRACK_ASAN=1
+   > make -j8
+
+   and then compile this file as:
+
+   > clang -g -o test-wrong -I../../include ../../test/test-wrong.c libmimalloc-asan-debug.a -lpthread -fsanitize=address -fsanitize-recover=address
+
+   and test as:
+
+   > ASAN_OPTIONS=verbosity=1:halt_on_error=0 ./test-wrong
+
+
 */
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/3rd/mimalloc-2.0.9/test/testhelper.h b/3rd/mimalloc-3.2.6/test/testhelper.h
similarity index 100%
rename from 3rd/mimalloc-2.0.9/test/testhelper.h
rename to 3rd/mimalloc-3.2.6/test/testhelper.h
diff --git a/CodeFormatServer/CMakeLists.txt b/CodeFormatServer/CMakeLists.txt
index b6b2cbc9..161876f1 100644
--- a/CodeFormatServer/CMakeLists.txt
+++ b/CodeFormatServer/CMakeLists.txt
@@ -10,7 +10,7 @@ target_include_directories(CodeFormatServer PRIVATE
         include
         ${LuaCodeStyle_SOURCE_DIR}/3rd/asio-1.24.0/include
         ${LuaCodeStyle_SOURCE_DIR}/3rd/nlohmann_json/include
-        ${LuaCodeStyle_SOURCE_DIR}/3rd/mimalloc-2.0.9/include
+        ${LuaCodeStyle_SOURCE_DIR}/3rd/mimalloc-3.2.6/include
         src
 )
 
@@ -57,7 +57,7 @@ target_sources(CodeFormatServer
         src/Service/ConfigService.cpp
 
         # mimalloc
-        ${LuaCodeStyle_SOURCE_DIR}/3rd/mimalloc-2.0.9/src/static.c
+        ${LuaCodeStyle_SOURCE_DIR}/3rd/mimalloc-3.2.6/src/static.c
 )
 
 target_link_libraries(CodeFormatServer CodeFormatCore)