From 1117f55761b1be66ee76ab831afea2f18f7d2367 Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Mon, 26 Jan 2026 12:00:26 -0500 Subject: [PATCH 1/9] Enhance Profiler with thread-local data management and synchronization logic; add unit tests for profiling functionality --- src/ipc/utils/profiler.cpp | 177 ++++++++++++++++++++++-- src/ipc/utils/profiler.hpp | 42 ++++-- tests/src/tests/utils/CMakeLists.txt | 3 +- tests/src/tests/utils/test_profiler.cpp | 88 ++++++++++++ 4 files changed, 286 insertions(+), 24 deletions(-) create mode 100644 tests/src/tests/utils/test_profiler.cpp diff --git a/src/ipc/utils/profiler.cpp b/src/ipc/utils/profiler.cpp index 12bbfaeb6..62cb3ef91 100644 --- a/src/ipc/utils/profiler.cpp +++ b/src/ipc/utils/profiler.cpp @@ -2,12 +2,13 @@ #ifdef IPC_TOOLKIT_WITH_PROFILER +#include #include #include namespace ipc { -Profiler::Profiler() { } +Profiler::Profiler() { m_main_thread_id = std::this_thread::get_id(); } Profiler& profiler() { @@ -15,14 +16,108 @@ Profiler& profiler() return instance; } -void Profiler::clear() { m_data.clear(); } +void Profiler::clear() { m_thread_data.clear(); } void Profiler::start(const std::string& name) { - current_scope.push_back(name); + auto& [m_data, current_scope, scope_path, sync_version] = + m_thread_data.local(); + + if (std::this_thread::get_id() == m_main_thread_id) { + // --- Main Thread Logic --- + if (!m_main_thread_data) { + m_main_thread_data = &m_thread_data.local(); + } + + // SeqLock: Increment to Odd (Write in progress) + size_t v = m_scope_version.load(std::memory_order_relaxed); + m_scope_version.store(v + 1, std::memory_order_release); + + scope_path.push_back(name); + current_scope.push_back(name); + + // SeqLock: Increment to Even (Write finished) + m_scope_version.store(v + 2, std::memory_order_release); + } else { + // --- Worker Thread Logic --- + + size_t global_v = m_scope_version.load(std::memory_order_acquire); + + // If versions mismatch, we must re-sync our scope with the main thread + if (sync_version != global_v) { + + std::vector snapshot_path; + size_t v_snapshot = 0; + + // SeqLock Read Loop + while (true) { + size_t v1 = m_scope_version.load(std::memory_order_acquire); + if (v1 % 2 != 0) { // Writing? + std::this_thread::yield(); + continue; + } + + if (m_main_thread_data) { + snapshot_path = m_main_thread_data->scope_path; + } else { + snapshot_path.clear(); + } + + std::atomic_thread_fence(std::memory_order_acquire); + size_t v2 = m_scope_version.load(std::memory_order_acquire); + + if (v1 == v2) { + v_snapshot = v1; + break; + } + } + + // --- Deduplication / Pruning Strategy --- + // If the main thread is already deep inside the parallel loop, its + // stack might look like ["Block 3", "Block 4", "Block 5"]. If we + // are starting "Block 4", we want to inherit ["Block 3"]. Strategy: + // Search for 'name' in the snapshot from back to front. If found, + // prune everything after (and including) it. + + auto it = + std::find(snapshot_path.rbegin(), snapshot_path.rend(), name); + if (it != snapshot_path.rend()) { + // Found 'name' in the main stack. + // The 'base' scope is everything before this occurrence. + // rbegin() is the last element. rend() is before first. + // distance from rbegin is how many elements to pop + 1. + // Or simply: resize to the index of the element. + + // Convert reverse iterator to index + // forward iterator: it.base() returns iterator to element AFTER + // the one found so we want to keep everything up to (it.base() + // - 1) - 1 ?? Simpler: The index of the element found is: + size_t found_idx = + std::distance(begin(snapshot_path), it.base()) - 1; + + // We want to keep [0 ... found_idx - 1] + snapshot_path.resize(found_idx); + } + + // Apply snapshot to local state + scope_path = snapshot_path; + current_scope = nlohmann::json::json_pointer(); + for (const auto& s : scope_path) { + current_scope.push_back(s); + } + + sync_version = v_snapshot; + } + + // Finally push the new block for this thread + scope_path.push_back(name); + current_scope.push_back(name); + } + + // Initialize data container if needed if (!m_data.contains(current_scope)) { m_data[current_scope] = { - { "time_ms", 0 }, + { "time_ms", 0.0 }, { "count", 0 }, }; } @@ -34,6 +129,10 @@ void Profiler::stop(const double time_ms) "[{}] {{}} {{:.6f}} ms", fmt::format(fmt::fg(fmt::terminal_color::magenta), "timing")); + auto& [m_data, current_scope, scope_path, sync_version] = + m_thread_data.local(); + + // Optional: Logging every stop might be spammy in parallel loops logger().trace( fmt::runtime(log_fmt_text), current_scope.to_string(), time_ms); @@ -44,22 +143,71 @@ void Profiler::stop(const double time_ms) m_data[current_scope]["time_ms"].get() + time_ms; m_data[current_scope]["count"] = m_data[current_scope]["count"].get() + 1; - current_scope.pop_back(); + + if (std::this_thread::get_id() == m_main_thread_id) { + size_t v = m_scope_version.load(std::memory_order_relaxed); + m_scope_version.store(v + 1, std::memory_order_release); + + scope_path.pop_back(); + current_scope.pop_back(); + + m_scope_version.store(v + 2, std::memory_order_release); + } else { + scope_path.pop_back(); + current_scope.pop_back(); + } } void Profiler::reset() { - m_data.clear(); - // reset the calling thread's scope - current_scope = nlohmann::json::json_pointer(); // root + m_thread_data.clear(); + m_scope_version = 0; + m_main_thread_data = nullptr; } +void Profiler::merge_json(nlohmann::json& target, const nlohmann::json& source) +{ + for (auto it = source.begin(); it != source.end(); ++it) { + if (target.contains(it.key())) { + if (it.value().is_number()) { + // Sum numeric values (time_ms, count) + if (target[it.key()].is_number()) { + if (it.value().is_number_integer()) { + target[it.key()] = + target[it.key()].get() + it.value().get(); + } else if (it.value().is_number_float()) { + target[it.key()] = target[it.key()].get() + + it.value().get(); + } + } + } else if (it.value().is_object()) { + // Recursively merge objects (scopes) + merge_json(target[it.key()], it.value()); + } + } else { + // New key, just copy + target[it.key()] = it.value(); + } + } +} + +nlohmann::json Profiler::combine_data() const +{ + nlohmann::json combined = nlohmann::json::object(); + for (const auto& tld : m_thread_data) { + merge_json(combined, tld.m_data); + } + return combined; +} + +nlohmann::json Profiler::data() const { return combine_data(); } + void Profiler::print() const { logger().info( "[{}] profiler: {}", fmt::format(fmt::fg(fmt::terminal_color::magenta), "timing"), - m_data.dump(2)); + combine_data().dump(2)); } void Profiler::write_csv(const std::string& filename) const @@ -75,16 +223,16 @@ void Profiler::write_csv(const std::string& filename) const void Profiler::write_csv(std::ostream& os) const { + nlohmann::json combined = combine_data(); + os << "Id,Parent,Name,Time (ms),Count\n"; - if (m_data.empty()) { + if (combined.empty()) { os << std::flush; return; } - // Print the profiler data in CSV format using a breadth-first traversal const nlohmann::json::json_pointer root; - // parent id, pointer std::queue> queue; queue.push(std::make_pair(-1, root)); int id = -1; @@ -93,8 +241,8 @@ void Profiler::write_csv(std::ostream& os) const const auto [parent_id, ptr] = queue.front(); queue.pop(); - assert(m_data.contains(ptr)); - const auto& data = ptr == root ? m_data : m_data.at(ptr); + assert(combined.contains(ptr)); + const auto& data = ptr == root ? combined : combined.at(ptr); if (ptr != root) { os << fmt::format( "{:d},{},{},{:.6g},{:d}\n", id, @@ -103,7 +251,6 @@ void Profiler::write_csv(std::ostream& os) const data.at("count").get()); } - // Traverse child scopes for (const auto& [key, val] : data.items()) { if (val.is_object()) { queue.push(std::make_pair(id, ptr / key)); diff --git a/src/ipc/utils/profiler.hpp b/src/ipc/utils/profiler.hpp index 2f560247e..13d99163e 100644 --- a/src/ipc/utils/profiler.hpp +++ b/src/ipc/utils/profiler.hpp @@ -10,9 +10,13 @@ // clang-format on #include +#include +#include #include #include +#include +#include // Helper macro to stringify/paste after expansion #define IPC_TOOLKIT_PROFILE_BLOCK_CONCAT_IMPL(a, b) a##b @@ -58,17 +62,39 @@ class Profiler { void print_csv() const { write_csv(std::cout); } /// @brief Access the profiling data as a JSON object. - const nlohmann::json& data() const { return m_data; } - - /// @brief Access the profiling data as a JSON object. - nlohmann::json& data() { return m_data; } + nlohmann::json data() const; protected: - /// @brief The profiling data stored as a JSON object. - nlohmann::json m_data; + struct ThreadLocalData { + /// @brief The profiling data stored as a JSON object. + nlohmann::json m_data; + + /// @brief The global scope pointer into the JSON data. + nlohmann::json::json_pointer current_scope; + + /// @brief A helper vector to track the path strings for syncing logic. + std::vector scope_path; + + /// @brief The version of the global scope this thread is synced with. + size_t sync_version = 0; + }; + + /// @brief Thread-local storage for profiling data. + mutable tbb::enumerable_thread_specific m_thread_data; + + /// @brief Helper to combine all thread-local data into one JSON object. + nlohmann::json combine_data() const; + + /// @brief Helper to recursively merge two JSON objects. + static void + merge_json(nlohmann::json& target, const nlohmann::json& source); + + // --- Synchronization --- + std::thread::id m_main_thread_id; + ThreadLocalData* m_main_thread_data = nullptr; - /// @brief The global scope pointer into the JSON data. - nlohmann::json::json_pointer current_scope; + // Sequence lock version: Even = Stable, Odd = Modifying + std::atomic m_scope_version { 0 }; }; Profiler& profiler(); diff --git a/tests/src/tests/utils/CMakeLists.txt b/tests/src/tests/utils/CMakeLists.txt index 2a547cbd7..50f0dff01 100644 --- a/tests/src/tests/utils/CMakeLists.txt +++ b/tests/src/tests/utils/CMakeLists.txt @@ -1,7 +1,8 @@ set(SOURCES # Tests - test_utils.cpp test_matrixcache.cpp + test_profiler.cpp + test_utils.cpp # Benchmarks diff --git a/tests/src/tests/utils/test_profiler.cpp b/tests/src/tests/utils/test_profiler.cpp new file mode 100644 index 000000000..1d9c72e0f --- /dev/null +++ b/tests/src/tests/utils/test_profiler.cpp @@ -0,0 +1,88 @@ +#include + +#ifdef IPC_TOOLKIT_WITH_PROFILER + +#include +#include +#include + +using namespace ipc; + +TEST_CASE("Profiler", "[profiler]") +{ + constexpr int sleep_time_ms = 100; + constexpr int num_threads = 10; + + { + IPC_TOOLKIT_PROFILE_BLOCK("Block 1"); + std::this_thread::sleep_for(std::chrono::milliseconds(sleep_time_ms)); + } + + nlohmann::json data = profiler().data(); + + CHECK(data.size() == 1); + + REQUIRE(data.contains("Block 1")); + nlohmann::json block1 = data.at("Block 1"); + + CHECK(block1.size() == 2); // count, time_ms + CHECK( + block1["time_ms"].get() + == Catch::Approx(sleep_time_ms).margin(10)); + CHECK(block1["count"].get() == 1); + + // --------------------------------------------------------------------- + + tbb::parallel_for(0, num_threads, [&](int) { + // for (int i = 0; i < num_threads; ++i) { + IPC_TOOLKIT_PROFILE_BLOCK("Block 2"); + std::this_thread::sleep_for( + std::chrono::milliseconds(sleep_time_ms / num_threads)); + // } + }); + + data = profiler().data(); + profiler().print(); + + CHECK(data.size() == 2); + + REQUIRE(data.contains("Block 2")); + nlohmann::json block2 = data.at("Block 2"); + CHECK(block2.size() == 2); // count, time_ms + CHECK(block2["count"].get() == num_threads); + + // --------------------------------------------------------------------- + + { + IPC_TOOLKIT_PROFILE_BLOCK("Block 3"); + + tbb::parallel_for(0, num_threads, [&](int) { + // for (int i = 0; i < num_threads; ++i) { + IPC_TOOLKIT_PROFILE_BLOCK("Block 4"); + { + IPC_TOOLKIT_PROFILE_BLOCK("Block 5"); + std::this_thread::sleep_for( + std::chrono::milliseconds(sleep_time_ms / num_threads)); + } + // } + }); + } + + data = profiler().data(); + profiler().print(); + + CHECK(data.size() == 3); + + REQUIRE(data.contains("Block 3")); + CAPTURE(!data.contains("Block 4")); + nlohmann::json block3 = data.at("Block 3"); + CHECK(block3.size() == 3); // count, time_ms, Block 4 + CHECK(block3["count"].get() == 1); + + REQUIRE(block3.contains("Block 4")); + nlohmann::json block4 = block3.at("Block 4"); + CHECK(block4.size() == 3); // count, time_ms + CHECK(block4["count"].get() == num_threads); +} + +#endif \ No newline at end of file From 0f864416bcc7d4ff306855d32e261f1f0211cd8b Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Tue, 28 Apr 2026 10:53:34 -0400 Subject: [PATCH 2/9] Revert Profiler changes to main --- src/ipc/utils/profiler.cpp | 177 ++++--------------------------------- src/ipc/utils/profiler.hpp | 42 ++------- 2 files changed, 23 insertions(+), 196 deletions(-) diff --git a/src/ipc/utils/profiler.cpp b/src/ipc/utils/profiler.cpp index 62cb3ef91..12bbfaeb6 100644 --- a/src/ipc/utils/profiler.cpp +++ b/src/ipc/utils/profiler.cpp @@ -2,13 +2,12 @@ #ifdef IPC_TOOLKIT_WITH_PROFILER -#include #include #include namespace ipc { -Profiler::Profiler() { m_main_thread_id = std::this_thread::get_id(); } +Profiler::Profiler() { } Profiler& profiler() { @@ -16,108 +15,14 @@ Profiler& profiler() return instance; } -void Profiler::clear() { m_thread_data.clear(); } +void Profiler::clear() { m_data.clear(); } void Profiler::start(const std::string& name) { - auto& [m_data, current_scope, scope_path, sync_version] = - m_thread_data.local(); - - if (std::this_thread::get_id() == m_main_thread_id) { - // --- Main Thread Logic --- - if (!m_main_thread_data) { - m_main_thread_data = &m_thread_data.local(); - } - - // SeqLock: Increment to Odd (Write in progress) - size_t v = m_scope_version.load(std::memory_order_relaxed); - m_scope_version.store(v + 1, std::memory_order_release); - - scope_path.push_back(name); - current_scope.push_back(name); - - // SeqLock: Increment to Even (Write finished) - m_scope_version.store(v + 2, std::memory_order_release); - } else { - // --- Worker Thread Logic --- - - size_t global_v = m_scope_version.load(std::memory_order_acquire); - - // If versions mismatch, we must re-sync our scope with the main thread - if (sync_version != global_v) { - - std::vector snapshot_path; - size_t v_snapshot = 0; - - // SeqLock Read Loop - while (true) { - size_t v1 = m_scope_version.load(std::memory_order_acquire); - if (v1 % 2 != 0) { // Writing? - std::this_thread::yield(); - continue; - } - - if (m_main_thread_data) { - snapshot_path = m_main_thread_data->scope_path; - } else { - snapshot_path.clear(); - } - - std::atomic_thread_fence(std::memory_order_acquire); - size_t v2 = m_scope_version.load(std::memory_order_acquire); - - if (v1 == v2) { - v_snapshot = v1; - break; - } - } - - // --- Deduplication / Pruning Strategy --- - // If the main thread is already deep inside the parallel loop, its - // stack might look like ["Block 3", "Block 4", "Block 5"]. If we - // are starting "Block 4", we want to inherit ["Block 3"]. Strategy: - // Search for 'name' in the snapshot from back to front. If found, - // prune everything after (and including) it. - - auto it = - std::find(snapshot_path.rbegin(), snapshot_path.rend(), name); - if (it != snapshot_path.rend()) { - // Found 'name' in the main stack. - // The 'base' scope is everything before this occurrence. - // rbegin() is the last element. rend() is before first. - // distance from rbegin is how many elements to pop + 1. - // Or simply: resize to the index of the element. - - // Convert reverse iterator to index - // forward iterator: it.base() returns iterator to element AFTER - // the one found so we want to keep everything up to (it.base() - // - 1) - 1 ?? Simpler: The index of the element found is: - size_t found_idx = - std::distance(begin(snapshot_path), it.base()) - 1; - - // We want to keep [0 ... found_idx - 1] - snapshot_path.resize(found_idx); - } - - // Apply snapshot to local state - scope_path = snapshot_path; - current_scope = nlohmann::json::json_pointer(); - for (const auto& s : scope_path) { - current_scope.push_back(s); - } - - sync_version = v_snapshot; - } - - // Finally push the new block for this thread - scope_path.push_back(name); - current_scope.push_back(name); - } - - // Initialize data container if needed + current_scope.push_back(name); if (!m_data.contains(current_scope)) { m_data[current_scope] = { - { "time_ms", 0.0 }, + { "time_ms", 0 }, { "count", 0 }, }; } @@ -129,10 +34,6 @@ void Profiler::stop(const double time_ms) "[{}] {{}} {{:.6f}} ms", fmt::format(fmt::fg(fmt::terminal_color::magenta), "timing")); - auto& [m_data, current_scope, scope_path, sync_version] = - m_thread_data.local(); - - // Optional: Logging every stop might be spammy in parallel loops logger().trace( fmt::runtime(log_fmt_text), current_scope.to_string(), time_ms); @@ -143,71 +44,22 @@ void Profiler::stop(const double time_ms) m_data[current_scope]["time_ms"].get() + time_ms; m_data[current_scope]["count"] = m_data[current_scope]["count"].get() + 1; - - if (std::this_thread::get_id() == m_main_thread_id) { - size_t v = m_scope_version.load(std::memory_order_relaxed); - m_scope_version.store(v + 1, std::memory_order_release); - - scope_path.pop_back(); - current_scope.pop_back(); - - m_scope_version.store(v + 2, std::memory_order_release); - } else { - scope_path.pop_back(); - current_scope.pop_back(); - } + current_scope.pop_back(); } void Profiler::reset() { - m_thread_data.clear(); - m_scope_version = 0; - m_main_thread_data = nullptr; + m_data.clear(); + // reset the calling thread's scope + current_scope = nlohmann::json::json_pointer(); // root } -void Profiler::merge_json(nlohmann::json& target, const nlohmann::json& source) -{ - for (auto it = source.begin(); it != source.end(); ++it) { - if (target.contains(it.key())) { - if (it.value().is_number()) { - // Sum numeric values (time_ms, count) - if (target[it.key()].is_number()) { - if (it.value().is_number_integer()) { - target[it.key()] = - target[it.key()].get() + it.value().get(); - } else if (it.value().is_number_float()) { - target[it.key()] = target[it.key()].get() - + it.value().get(); - } - } - } else if (it.value().is_object()) { - // Recursively merge objects (scopes) - merge_json(target[it.key()], it.value()); - } - } else { - // New key, just copy - target[it.key()] = it.value(); - } - } -} - -nlohmann::json Profiler::combine_data() const -{ - nlohmann::json combined = nlohmann::json::object(); - for (const auto& tld : m_thread_data) { - merge_json(combined, tld.m_data); - } - return combined; -} - -nlohmann::json Profiler::data() const { return combine_data(); } - void Profiler::print() const { logger().info( "[{}] profiler: {}", fmt::format(fmt::fg(fmt::terminal_color::magenta), "timing"), - combine_data().dump(2)); + m_data.dump(2)); } void Profiler::write_csv(const std::string& filename) const @@ -223,16 +75,16 @@ void Profiler::write_csv(const std::string& filename) const void Profiler::write_csv(std::ostream& os) const { - nlohmann::json combined = combine_data(); - os << "Id,Parent,Name,Time (ms),Count\n"; - if (combined.empty()) { + if (m_data.empty()) { os << std::flush; return; } + // Print the profiler data in CSV format using a breadth-first traversal const nlohmann::json::json_pointer root; + // parent id, pointer std::queue> queue; queue.push(std::make_pair(-1, root)); int id = -1; @@ -241,8 +93,8 @@ void Profiler::write_csv(std::ostream& os) const const auto [parent_id, ptr] = queue.front(); queue.pop(); - assert(combined.contains(ptr)); - const auto& data = ptr == root ? combined : combined.at(ptr); + assert(m_data.contains(ptr)); + const auto& data = ptr == root ? m_data : m_data.at(ptr); if (ptr != root) { os << fmt::format( "{:d},{},{},{:.6g},{:d}\n", id, @@ -251,6 +103,7 @@ void Profiler::write_csv(std::ostream& os) const data.at("count").get()); } + // Traverse child scopes for (const auto& [key, val] : data.items()) { if (val.is_object()) { queue.push(std::make_pair(id, ptr / key)); diff --git a/src/ipc/utils/profiler.hpp b/src/ipc/utils/profiler.hpp index 13d99163e..2f560247e 100644 --- a/src/ipc/utils/profiler.hpp +++ b/src/ipc/utils/profiler.hpp @@ -10,13 +10,9 @@ // clang-format on #include -#include -#include #include #include -#include -#include // Helper macro to stringify/paste after expansion #define IPC_TOOLKIT_PROFILE_BLOCK_CONCAT_IMPL(a, b) a##b @@ -62,39 +58,17 @@ class Profiler { void print_csv() const { write_csv(std::cout); } /// @brief Access the profiling data as a JSON object. - nlohmann::json data() const; + const nlohmann::json& data() const { return m_data; } -protected: - struct ThreadLocalData { - /// @brief The profiling data stored as a JSON object. - nlohmann::json m_data; - - /// @brief The global scope pointer into the JSON data. - nlohmann::json::json_pointer current_scope; - - /// @brief A helper vector to track the path strings for syncing logic. - std::vector scope_path; - - /// @brief The version of the global scope this thread is synced with. - size_t sync_version = 0; - }; - - /// @brief Thread-local storage for profiling data. - mutable tbb::enumerable_thread_specific m_thread_data; - - /// @brief Helper to combine all thread-local data into one JSON object. - nlohmann::json combine_data() const; - - /// @brief Helper to recursively merge two JSON objects. - static void - merge_json(nlohmann::json& target, const nlohmann::json& source); + /// @brief Access the profiling data as a JSON object. + nlohmann::json& data() { return m_data; } - // --- Synchronization --- - std::thread::id m_main_thread_id; - ThreadLocalData* m_main_thread_data = nullptr; +protected: + /// @brief The profiling data stored as a JSON object. + nlohmann::json m_data; - // Sequence lock version: Even = Stable, Odd = Modifying - std::atomic m_scope_version { 0 }; + /// @brief The global scope pointer into the JSON data. + nlohmann::json::json_pointer current_scope; }; Profiler& profiler(); From dbe7341291cdf444457817c734f3df75f22737f1 Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Tue, 28 Apr 2026 11:56:51 -0400 Subject: [PATCH 3/9] Add profiler instrumentation to core pipeline and potentials - Insert IPC_TOOLKIT_PROFILE_BLOCKs in collision, candidate, and potential build and evaluation routines for detailed profiling - Add name() method to all Potential subclasses for block labeling - Extend profiler test to cover full IPC pipeline on sample mesh data - Update includes to ensure profiler header is available where needed --- src/ipc/candidates/candidates.cpp | 6 + .../collisions/normal/normal_collisions.cpp | 4 + .../tangential/tangential_collisions.cpp | 4 + src/ipc/ipc.cpp | 5 + src/ipc/potentials/barrier_potential.hpp | 2 + src/ipc/potentials/friction_potential.hpp | 2 + .../potentials/normal_adhesion_potential.hpp | 2 + src/ipc/potentials/potential.cpp | 136 +++++++++++------- src/ipc/potentials/potential.hpp | 5 + .../tangential_adhesion_potential.hpp | 5 + tests/src/tests/utils/test_profiler.cpp | 80 ++++++++++- 11 files changed, 195 insertions(+), 56 deletions(-) diff --git a/src/ipc/candidates/candidates.cpp b/src/ipc/candidates/candidates.cpp index 8f23e102a..b7800708a 100644 --- a/src/ipc/candidates/candidates.cpp +++ b/src/ipc/candidates/candidates.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -45,6 +46,8 @@ void Candidates::build( const double inflation_radius, BroadPhase* broad_phase) { + IPC_TOOLKIT_PROFILE_BLOCK("Candidates::build(static)"); + std::unique_ptr default_broad_phase; if (broad_phase == nullptr) { default_broad_phase = make_default_broad_phase(); @@ -130,6 +133,8 @@ void Candidates::build( const double inflation_radius, BroadPhase* broad_phase) { + IPC_TOOLKIT_PROFILE_BLOCK("Candidates::build(dynamic)"); + std::unique_ptr default_broad_phase; if (broad_phase == nullptr) { default_broad_phase = make_default_broad_phase(); @@ -255,6 +260,7 @@ double Candidates::compute_collision_free_stepsize( { assert(vertices_t0.rows() == mesh.num_vertices()); assert(vertices_t1.rows() == mesh.num_vertices()); + IPC_TOOLKIT_PROFILE_BLOCK("Candidates::compute_collision_free_stepsize"); if (empty()) { return 1; // No possible collisions, so can take full step. diff --git a/src/ipc/collisions/normal/normal_collisions.cpp b/src/ipc/collisions/normal/normal_collisions.cpp index 92fdc2cfe..5e061e90d 100644 --- a/src/ipc/collisions/normal/normal_collisions.cpp +++ b/src/ipc/collisions/normal/normal_collisions.cpp @@ -3,6 +3,8 @@ #include #include +#include + #include #include #include @@ -24,6 +26,7 @@ void NormalCollisions::build( BroadPhase* broad_phase) { assert(vertices.rows() == mesh.num_vertices()); + IPC_TOOLKIT_PROFILE_BLOCK("NormalCollisions::build"); const double inflation_radius = 0.5 * (dhat + dmin); @@ -41,6 +44,7 @@ void NormalCollisions::build( const double dmin) { assert(vertices.rows() == mesh.num_vertices()); + IPC_TOOLKIT_PROFILE_BLOCK("NormalCollisions::build(candidates)"); clear(); diff --git a/src/ipc/collisions/tangential/tangential_collisions.cpp b/src/ipc/collisions/tangential/tangential_collisions.cpp index 85702e546..e1fd54f1a 100644 --- a/src/ipc/collisions/tangential/tangential_collisions.cpp +++ b/src/ipc/collisions/tangential/tangential_collisions.cpp @@ -8,6 +8,8 @@ #include #include +#include + #include // std::out_of_range #include @@ -69,6 +71,7 @@ void TangentialCollisions::build( { assert(mu_s.size() == vertices.rows()); assert(mu_k.size() == vertices.rows()); + IPC_TOOLKIT_PROFILE_BLOCK("TangentialCollisions::build"); const Eigen::MatrixXi& edges = mesh.edges(); const Eigen::MatrixXi& faces = mesh.faces(); @@ -180,6 +183,7 @@ void TangentialCollisions::build( { assert(mu_k.size() == vertices.rows()); assert(mu_s.size() == vertices.rows()); + IPC_TOOLKIT_PROFILE_BLOCK("TangentialCollisions::build(smooth)"); const Eigen::MatrixXi& edges = mesh.edges(); const Eigen::MatrixXi& faces = mesh.faces(); diff --git a/src/ipc/ipc.cpp b/src/ipc/ipc.cpp index b34c09238..b838f33db 100644 --- a/src/ipc/ipc.cpp +++ b/src/ipc/ipc.cpp @@ -11,6 +11,8 @@ #include #endif +#include + #include namespace ipc { @@ -25,6 +27,7 @@ bool is_step_collision_free( { assert(vertices_t0.rows() == mesh.num_vertices()); assert(vertices_t1.rows() == mesh.num_vertices()); + IPC_TOOLKIT_PROFILE_BLOCK("is_step_collision_free"); // Broad phase Candidates candidates; @@ -49,6 +52,7 @@ double compute_collision_free_stepsize( { assert(vertices_t0.rows() == mesh.num_vertices()); assert(vertices_t1.rows() == mesh.num_vertices()); + IPC_TOOLKIT_PROFILE_BLOCK("compute_collision_free_stepsize"); std::unique_ptr default_broad_phase; if (broad_phase == nullptr) { @@ -104,6 +108,7 @@ bool has_intersections( BroadPhase* broad_phase) { assert(vertices.rows() == mesh.num_vertices()); + IPC_TOOLKIT_PROFILE_BLOCK("has_intersections"); std::unique_ptr default_broad_phase; if (broad_phase == nullptr) { diff --git a/src/ipc/potentials/barrier_potential.hpp b/src/ipc/potentials/barrier_potential.hpp index 06611355f..bf5e23a70 100644 --- a/src/ipc/potentials/barrier_potential.hpp +++ b/src/ipc/potentials/barrier_potential.hpp @@ -74,6 +74,8 @@ class BarrierPotential : public NormalPotential { m_barrier = barrier; } + std::string name() const override { return "BarrierPotential"; } + using Super::operator(); using Super::gradient; using Super::hessian; diff --git a/src/ipc/potentials/friction_potential.hpp b/src/ipc/potentials/friction_potential.hpp index cf0f1afea..54f473827 100644 --- a/src/ipc/potentials/friction_potential.hpp +++ b/src/ipc/potentials/friction_potential.hpp @@ -24,6 +24,8 @@ class FrictionPotential : public TangentialPotential { m_eps_v = eps_v; } + std::string name() const override { return "FrictionPotential"; } + protected: /// @brief Compute the value of the ∫ μ(y) f₁(y) dy, where f₁ is the first derivative of the smooth mollifier. /// @param x The tangential relative speed. diff --git a/src/ipc/potentials/normal_adhesion_potential.hpp b/src/ipc/potentials/normal_adhesion_potential.hpp index bb55aa649..92cdf4872 100644 --- a/src/ipc/potentials/normal_adhesion_potential.hpp +++ b/src/ipc/potentials/normal_adhesion_potential.hpp @@ -15,6 +15,8 @@ class NormalAdhesionPotential : public NormalPotential { const double _Y, const double _eps_c); + std::string name() const override { return "NormalAdhesionPotential"; } + using Super::operator(); using Super::gradient; using Super::hessian; diff --git a/src/ipc/potentials/potential.cpp b/src/ipc/potentials/potential.cpp index 44f1fbe91..9451a2d88 100644 --- a/src/ipc/potentials/potential.cpp +++ b/src/ipc/potentials/potential.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -39,6 +40,7 @@ double Potential::operator()( Eigen::ConstRef X) const { assert(X.rows() == mesh.num_vertices()); + IPC_TOOLKIT_PROFILE_BLOCK(this->name() + "::operator()"); return tbb::parallel_reduce( tbb::blocked_range(size_t(0), collisions.size()), 0.0, @@ -61,6 +63,7 @@ Eigen::VectorXd Potential::gradient( Eigen::ConstRef X) const { assert(X.rows() == mesh.num_vertices()); + IPC_TOOLKIT_PROFILE_BLOCK(this->name() + "::gradient()"); if (collisions.empty()) { return Eigen::VectorXd::Zero(X.size()); @@ -70,26 +73,32 @@ Eigen::VectorXd Potential::gradient( tbb::combinable grad(Eigen::VectorXd::Zero(X.size())); - tbb::parallel_for( - tbb::blocked_range(size_t(0), collisions.size()), - [&](const tbb::blocked_range& r) { - for (size_t i = r.begin(); i < r.end(); i++) { - const TCollision& collision = collisions[i]; - - const VectorMaxNd local_grad = this->gradient( - collision, collision.dof(X, mesh.edges(), mesh.faces())); - - const std::array vids = - collision.vertex_ids(mesh.edges(), mesh.faces()); - - local_gradient_to_global_gradient( - local_grad, vids, dim, grad.local()); - } - }); + { + IPC_TOOLKIT_PROFILE_BLOCK("compute local gradients"); + tbb::parallel_for( + tbb::blocked_range(size_t(0), collisions.size()), + [&](const tbb::blocked_range& r) { + for (size_t i = r.begin(); i < r.end(); i++) { + const TCollision& collision = collisions[i]; + + const VectorMaxNd local_grad = this->gradient( + collision, + collision.dof(X, mesh.edges(), mesh.faces())); + + const std::array vids = + collision.vertex_ids(mesh.edges(), mesh.faces()); + + local_gradient_to_global_gradient( + local_grad, vids, dim, grad.local()); + } + }); + } - return grad.combine([](const Eigen::VectorXd& a, const Eigen::VectorXd& b) { - return a + b; - }); + { + IPC_TOOLKIT_PROFILE_BLOCK("combine local gradients"); + return grad.combine([](const Eigen::VectorXd& a, + const Eigen::VectorXd& b) { return a + b; }); + } } template @@ -100,6 +109,7 @@ Eigen::SparseMatrix Potential::hessian( const PSDProjectionMethod project_hessian_to_psd) const { assert(X.rows() == mesh.num_vertices()); + IPC_TOOLKIT_PROFILE_BLOCK(this->name() + "::hessian()"); if (collisions.empty()) { return Eigen::SparseMatrix(X.size(), X.size()); @@ -117,28 +127,30 @@ Eigen::SparseMatrix Potential::hessian( tbb::enumerable_thread_specific storage( LocalThreadMatStorage(buffer_size, ndof, ndof)); - tbb::parallel_for( - tbb::blocked_range(0, collisions.size()), - [&](const tbb::blocked_range& r) { - auto& hess_triplets = storage.local(); + { + IPC_TOOLKIT_PROFILE_BLOCK("compute local hessians and triplets"); + tbb::parallel_for( + tbb::blocked_range(0, collisions.size()), + [&](const tbb::blocked_range& r) { + auto& hess_triplets = storage.local(); - for (size_t i = r.begin(); i < r.end(); i++) { + for (size_t i = r.begin(); i < r.end(); i++) { - const TCollision& collision = collisions[i]; + const TCollision& collision = collisions[i]; - const MatrixMaxNd local_hess = this->hessian( - collisions[i], collisions[i].dof(X, edges, faces), - project_hessian_to_psd); + const MatrixMaxNd local_hess = this->hessian( + collisions[i], collisions[i].dof(X, edges, faces), + project_hessian_to_psd); - const std::array vids = - collision.vertex_ids(edges, faces); - - local_hessian_to_global_triplets( - local_hess, vids, dim, *(hess_triplets.cache), - mesh.num_vertices()); - } - }); + const std::array vids = + collision.vertex_ids(edges, faces); + local_hessian_to_global_triplets( + local_hess, vids, dim, *(hess_triplets.cache), + mesh.num_vertices()); + } + }); + } if (storage.empty()) { return Eigen::SparseMatrix(); } @@ -146,9 +158,12 @@ Eigen::SparseMatrix Potential::hessian( // Assemble the stiffness matrix by concatenating the tuples in each local // storage - tbb::parallel_for_each( - storage.begin(), storage.end(), - [](const auto& local_storage) { local_storage.cache->prune(); }); + { + IPC_TOOLKIT_PROFILE_BLOCK("prune local storages"); + tbb::parallel_for_each( + storage.begin(), storage.end(), + [](const auto& local_storage) { local_storage.cache->prune(); }); + } // Prepares for parallel concatenation std::vector offsets(storage.size()); @@ -178,26 +193,37 @@ Eigen::SparseMatrix Potential::hessian( return hess; } - triplets.resize(triplet_count); + // Allocate triplets + { + IPC_TOOLKIT_PROFILE_BLOCK("allocate triplets"); + triplets.resize(triplet_count); + } // Parallel copy into triplets - tbb::parallel_for(size_t(0), storage.size(), [&](size_t i) { - const SparseMatrixCache& cache = dynamic_cast( - *((storage.begin() + i)->cache)); - size_t offset = offsets[i]; - - std::copy( - cache.entries().begin(), cache.entries().end(), - triplets.begin() + offset); - offset += cache.entries().size(); - - if (cache.mat().nonZeros() > 0) { - set_triplets(cache.mat(), triplets, offset); - } - }); + { + IPC_TOOLKIT_PROFILE_BLOCK("parallel copy into triplets"); + tbb::parallel_for(size_t(0), storage.size(), [&](size_t i) { + const SparseMatrixCache& cache = + dynamic_cast( + *((storage.begin() + i)->cache)); + size_t offset = offsets[i]; + + std::copy( + cache.entries().begin(), cache.entries().end(), + triplets.begin() + offset); + offset += cache.entries().size(); + + if (cache.mat().nonZeros() > 0) { + set_triplets(cache.mat(), triplets, offset); + } + }); + } // Sort and assemble - hess.setFromTriplets(triplets.begin(), triplets.end()); + { + IPC_TOOLKIT_PROFILE_BLOCK("assemble hessian from triplets"); + hess.setFromTriplets(triplets.begin(), triplets.end()); + } return hess; } diff --git a/src/ipc/potentials/potential.hpp b/src/ipc/potentials/potential.hpp index cedde81bd..2c6c42785 100644 --- a/src/ipc/potentials/potential.hpp +++ b/src/ipc/potentials/potential.hpp @@ -3,6 +3,8 @@ #include #include +#include + namespace ipc { /// @brief Base class for potentials. @@ -19,6 +21,9 @@ template class Potential { Potential() = default; virtual ~Potential() = default; + /// @brief The name of this potential (used for profiling). + virtual std::string name() const = 0; + // -- Cumulative methods --------------------------------------------------- /// @brief Compute the potential for a set of collisions. diff --git a/src/ipc/potentials/tangential_adhesion_potential.hpp b/src/ipc/potentials/tangential_adhesion_potential.hpp index c3290e4d4..5ecf4f15b 100644 --- a/src/ipc/potentials/tangential_adhesion_potential.hpp +++ b/src/ipc/potentials/tangential_adhesion_potential.hpp @@ -24,6 +24,11 @@ class TangentialAdhesionPotential : public TangentialPotential { m_eps_a = eps_a; } + std::string name() const override + { + return "TangentialAdhesionPotential"; + } + protected: /// @brief Compute the value of the ∫ μ(y) f₁(y) dy, where f₁ is the first derivative of the smooth mollifier. /// @param x The tangential relative speed. diff --git a/tests/src/tests/utils/test_profiler.cpp b/tests/src/tests/utils/test_profiler.cpp index 1d9c72e0f..09b466d9f 100644 --- a/tests/src/tests/utils/test_profiler.cpp +++ b/tests/src/tests/utils/test_profiler.cpp @@ -1,7 +1,18 @@ -#include +#include #ifdef IPC_TOOLKIT_WITH_PROFILER +#include +#include +#include +#include +#include +#include +#include + +#include +#include + #include #include #include @@ -85,4 +96,71 @@ TEST_CASE("Profiler", "[profiler]") CHECK(block4["count"].get() == num_threads); } +TEST_CASE("Profile full pipeline", "[!benchmark][profiler]") +{ + // const std::string mesh_t0 = "cloth_ball92.ply"; + // const std::string mesh_t1 = "cloth_ball93.ply"; + const std::string mesh_t0 = "rod-twist/3036.ply"; + const std::string mesh_t1 = "rod-twist/3037.ply"; + + Eigen::MatrixXd V0_full, V1_full; + Eigen::MatrixXi F0, F1; + + const bool loaded_t0 = igl::read_triangle_mesh( + (ipc::tests::DATA_DIR / mesh_t0).string(), V0_full, F0); + const bool loaded_t1 = igl::read_triangle_mesh( + (ipc::tests::DATA_DIR / mesh_t1).string(), V1_full, F1); + + if (!loaded_t0 || !loaded_t1) { + WARN("Skipping profiler test: puffer-ball data not found"); + return; + } + + REQUIRE(F0.rows() == F1.rows()); + REQUIRE(V0_full.rows() == V1_full.rows()); + + Eigen::MatrixXi E; + igl::edges(F0, E); + + CollisionMesh mesh = CollisionMesh::build_from_full_mesh(V0_full, E, F0); + Eigen::MatrixXd V0 = mesh.vertices(V0_full); + Eigen::MatrixXd V1 = mesh.vertices(V1_full); + + profiler().reset(); + + const double dhat = 1e-3; + const double mu = 0.3; + const double eps_v = 1e-3; + + // CCD step size + const double step = compute_collision_free_stepsize( + mesh, V0, V1, 0.0, nullptr, AdditiveCCD()); + + // Normal collisions and barrier potential + NormalCollisions collisions; + collisions.build(mesh, V0, dhat); + + BarrierPotential bp(dhat, /*stiffness=*/1e4); + bp(collisions, mesh, V0); + bp.gradient(collisions, mesh, V0); + bp.hessian(collisions, mesh, V0); + + // Tangential (friction) collisions and friction potential + TangentialCollisions tangential; + tangential.build(mesh, V0, collisions, bp, mu); + + FrictionPotential fp(eps_v); + fp(tangential, mesh, V0); + fp.gradient(tangential, mesh, V0); + fp.hessian(tangential, mesh, V0); + + const std::string output = "profiler_output.csv"; + profiler().write_csv(output); + + logger().info( + "Profiler output written to: {}\n" + " vertices: {}, faces: {}, collisions: {}, step_size: {:.6g}\n", + output, V0.rows(), F0.rows(), collisions.size(), step); +} + #endif \ No newline at end of file From 3f78b30fd6e7d3ab3d37711488a8bdbaab5ad43f Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Tue, 28 Apr 2026 17:21:15 -0400 Subject: [PATCH 4/9] Make profiler single-threaded and update tests - Restrict Profiler to record only from the main thread for accurate timings - Add m_main_thread_id and m_current_scope to Profiler for thread tracking - Ignore profiling calls from non-main threads - Update tests to reflect single-threaded counting and aggregation - Refactor test to check counts and timings for nested blocks - Minor cleanup and improved comments --- src/ipc/potentials/potential.cpp | 31 ++++++++++------ src/ipc/utils/profiler.cpp | 49 ++++++++++++++++--------- src/ipc/utils/profiler.hpp | 7 +++- tests/src/tests/utils/test_profiler.cpp | 39 ++++++++++++++++---- 4 files changed, 89 insertions(+), 37 deletions(-) diff --git a/src/ipc/potentials/potential.cpp b/src/ipc/potentials/potential.cpp index 9451a2d88..50c6e204d 100644 --- a/src/ipc/potentials/potential.cpp +++ b/src/ipc/potentials/potential.cpp @@ -133,21 +133,28 @@ Eigen::SparseMatrix Potential::hessian( tbb::blocked_range(0, collisions.size()), [&](const tbb::blocked_range& r) { auto& hess_triplets = storage.local(); - for (size_t i = r.begin(); i < r.end(); i++) { - const TCollision& collision = collisions[i]; - const MatrixMaxNd local_hess = this->hessian( - collisions[i], collisions[i].dof(X, edges, faces), - project_hessian_to_psd); - - const std::array vids = - collision.vertex_ids(edges, faces); - - local_hessian_to_global_triplets( - local_hess, vids, dim, *(hess_triplets.cache), - mesh.num_vertices()); + MatrixMaxNd local_hess; + { + // IPC_TOOLKIT_PROFILE_BLOCK("compute local hessian"); + local_hess = this->hessian( + collision, collision.dof(X, edges, faces), + project_hessian_to_psd); + } + + { + // IPC_TOOLKIT_PROFILE_BLOCK( + // "map local hessian to global triplets"); + + const std::array + vids = collision.vertex_ids(edges, faces); + + local_hessian_to_global_triplets( + local_hess, vids, dim, *(hess_triplets.cache), + mesh.num_vertices()); + } } }); } diff --git a/src/ipc/utils/profiler.cpp b/src/ipc/utils/profiler.cpp index 12bbfaeb6..a5e61d7b2 100644 --- a/src/ipc/utils/profiler.cpp +++ b/src/ipc/utils/profiler.cpp @@ -7,7 +7,7 @@ namespace ipc { -Profiler::Profiler() { } +Profiler::Profiler() : m_main_thread_id(std::this_thread::get_id()) { } Profiler& profiler() { @@ -15,13 +15,22 @@ Profiler& profiler() return instance; } -void Profiler::clear() { m_data.clear(); } +void Profiler::clear() +{ + m_data = nlohmann::json::object(); + m_current_scope = nlohmann::json::json_pointer(); // root +} void Profiler::start(const std::string& name) { - current_scope.push_back(name); - if (!m_data.contains(current_scope)) { - m_data[current_scope] = { + if (std::this_thread::get_id() != m_main_thread_id) { + return; + } + + m_current_scope.push_back(name); + + if (!m_data.contains(m_current_scope)) { + m_data[m_current_scope] = { { "time_ms", 0 }, { "count", 0 }, }; @@ -30,28 +39,34 @@ void Profiler::start(const std::string& name) void Profiler::stop(const double time_ms) { + if (std::this_thread::get_id() != m_main_thread_id) { + return; + } + const static std::string log_fmt_text = fmt::format( "[{}] {{}} {{:.6f}} ms", fmt::format(fmt::fg(fmt::terminal_color::magenta), "timing")); logger().trace( - fmt::runtime(log_fmt_text), current_scope.to_string(), time_ms); - - assert(m_data.contains(current_scope)); - assert(m_data.at(current_scope).contains("time_ms")); - assert(m_data.at(current_scope).contains("count")); - m_data[current_scope]["time_ms"] = - m_data[current_scope]["time_ms"].get() + time_ms; - m_data[current_scope]["count"] = - m_data[current_scope]["count"].get() + 1; - current_scope.pop_back(); + fmt::runtime(log_fmt_text), m_current_scope.to_string(), time_ms); + + assert(m_data.contains(m_current_scope)); + assert(m_data.at(m_current_scope).contains("time_ms")); + assert(m_data.at(m_current_scope).contains("count")); + m_data[m_current_scope]["time_ms"] = + m_data[m_current_scope]["time_ms"].get() + time_ms; + m_data[m_current_scope]["count"] = + m_data[m_current_scope]["count"].get() + 1; + + m_current_scope.pop_back(); } void Profiler::reset() { + m_main_thread_id = std::this_thread::get_id(); m_data.clear(); // reset the calling thread's scope - current_scope = nlohmann::json::json_pointer(); // root + m_current_scope = nlohmann::json::json_pointer(); // root } void Profiler::print() const @@ -118,4 +133,4 @@ void Profiler::write_csv(std::ostream& os) const } // namespace ipc -#endif \ No newline at end of file +#endif diff --git a/src/ipc/utils/profiler.hpp b/src/ipc/utils/profiler.hpp index 2f560247e..5f365bc96 100644 --- a/src/ipc/utils/profiler.hpp +++ b/src/ipc/utils/profiler.hpp @@ -13,6 +13,7 @@ #include #include +#include // Helper macro to stringify/paste after expansion #define IPC_TOOLKIT_PROFILE_BLOCK_CONCAT_IMPL(a, b) a##b @@ -68,7 +69,11 @@ class Profiler { nlohmann::json m_data; /// @brief The global scope pointer into the JSON data. - nlohmann::json::json_pointer current_scope; + nlohmann::json::json_pointer m_current_scope; + + /// @brief The thread that records data; calls from all other threads are + /// silently ignored, giving a single-thread estimate of block costs. + std::thread::id m_main_thread_id; }; Profiler& profiler(); diff --git a/tests/src/tests/utils/test_profiler.cpp b/tests/src/tests/utils/test_profiler.cpp index 09b466d9f..a79855c48 100644 --- a/tests/src/tests/utils/test_profiler.cpp +++ b/tests/src/tests/utils/test_profiler.cpp @@ -60,24 +60,34 @@ TEST_CASE("Profiler", "[profiler]") REQUIRE(data.contains("Block 2")); nlohmann::json block2 = data.at("Block 2"); CHECK(block2.size() == 2); // count, time_ms - CHECK(block2["count"].get() == num_threads); + // Only the coordinator thread's iterations are recorded + CHECK(block2["count"].get() >= 1); // --------------------------------------------------------------------- - { + auto foo = []() { IPC_TOOLKIT_PROFILE_BLOCK("Block 3"); + IPC_TOOLKIT_PROFILE_BLOCK("Block 4"); tbb::parallel_for(0, num_threads, [&](int) { // for (int i = 0; i < num_threads; ++i) { - IPC_TOOLKIT_PROFILE_BLOCK("Block 4"); { IPC_TOOLKIT_PROFILE_BLOCK("Block 5"); std::this_thread::sleep_for( std::chrono::milliseconds(sleep_time_ms / num_threads)); } + + { + IPC_TOOLKIT_PROFILE_BLOCK("Block 6"); + std::this_thread::sleep_for( + std::chrono::milliseconds(sleep_time_ms / num_threads)); + } // } }); - } + }; + + foo(); + foo(); // Run it twice to check that counts are aggregated correctly data = profiler().data(); profiler().print(); @@ -88,12 +98,27 @@ TEST_CASE("Profiler", "[profiler]") CAPTURE(!data.contains("Block 4")); nlohmann::json block3 = data.at("Block 3"); CHECK(block3.size() == 3); // count, time_ms, Block 4 - CHECK(block3["count"].get() == 1); + CHECK(block3["count"].get() == 2); REQUIRE(block3.contains("Block 4")); nlohmann::json block4 = block3.at("Block 4"); - CHECK(block4.size() == 3); // count, time_ms - CHECK(block4["count"].get() == num_threads); + + REQUIRE(block4.contains("Block 5")); + nlohmann::json block5 = block4.at("Block 5"); + CHECK(block5.size() == 2); // count, time_ms + // foo() runs twice; coordinator records at least once per call + CHECK(block5["count"].get() >= 2); + CHECK(block5["time_ms"].get() < block4["time_ms"].get()); + + REQUIRE(block4.contains("Block 6")); + nlohmann::json block6 = block4.at("Block 6"); + CHECK(block6.size() == 2); // count, time_ms + CHECK(block6["count"].get() >= 2); + CHECK(block6["time_ms"].get() < block4["time_ms"].get()); + + CHECK( + block5["time_ms"].get() + block6["time_ms"].get() + < block4["time_ms"].get()); } TEST_CASE("Profile full pipeline", "[!benchmark][profiler]") From 176be4dd6e17b549b450be71131fee2c7a835634 Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Thu, 30 Apr 2026 11:50:08 -0400 Subject: [PATCH 5/9] Add profiler blocks to potential Hessian and profiler test - Wrap local Hessian computation and mapping in IPC_TOOLKIT_PROFILE_BLOCK - Adjust profiler test to profile parallel region as a single block --- src/ipc/potentials/potential.cpp | 20 +++++++++----- tests/src/tests/utils/test_profiler.cpp | 35 +++++++++++++------------ 2 files changed, 32 insertions(+), 23 deletions(-) diff --git a/src/ipc/potentials/potential.cpp b/src/ipc/potentials/potential.cpp index 1c4610eeb..839ae594c 100644 --- a/src/ipc/potentials/potential.cpp +++ b/src/ipc/potentials/potential.cpp @@ -127,13 +127,21 @@ Eigen::SparseMatrix Potential::hessian( const TCollision& collision = collisions[i]; - const MatrixMaxNd local_hess = this->hessian( - collisions[i], collisions[i].dof(X, edges, faces), - project_hessian_to_psd); + MatrixMaxNd local_hess; + { + IPC_TOOLKIT_PROFILE_BLOCK("compute local hessian"); + local_hess = this->hessian( + collision, collision.dof(X, edges, faces), + project_hessian_to_psd); + } - local_hessian_to_global_triplets( - local_hess, collision.vertex_ids(edges, faces), dim, - *(hess_triplets.cache), mesh.num_vertices()); + { + IPC_TOOLKIT_PROFILE_BLOCK( + "map local hessian to global triplets"); + local_hessian_to_global_triplets( + local_hess, collision.vertex_ids(edges, faces), dim, + *(hess_triplets.cache), mesh.num_vertices()); + } }); } if (storage.empty()) { diff --git a/tests/src/tests/utils/test_profiler.cpp b/tests/src/tests/utils/test_profiler.cpp index a79855c48..7d3e10363 100644 --- a/tests/src/tests/utils/test_profiler.cpp +++ b/tests/src/tests/utils/test_profiler.cpp @@ -67,23 +67,24 @@ TEST_CASE("Profiler", "[profiler]") auto foo = []() { IPC_TOOLKIT_PROFILE_BLOCK("Block 3"); - - IPC_TOOLKIT_PROFILE_BLOCK("Block 4"); - tbb::parallel_for(0, num_threads, [&](int) { - // for (int i = 0; i < num_threads; ++i) { - { - IPC_TOOLKIT_PROFILE_BLOCK("Block 5"); - std::this_thread::sleep_for( - std::chrono::milliseconds(sleep_time_ms / num_threads)); - } - - { - IPC_TOOLKIT_PROFILE_BLOCK("Block 6"); - std::this_thread::sleep_for( - std::chrono::milliseconds(sleep_time_ms / num_threads)); - } - // } - }); + { + IPC_TOOLKIT_PROFILE_BLOCK("Block 4"); + tbb::parallel_for(0, num_threads, [&](int) { + // for (int i = 0; i < num_threads; ++i) { + { + IPC_TOOLKIT_PROFILE_BLOCK("Block 5"); + std::this_thread::sleep_for( + std::chrono::milliseconds(sleep_time_ms / num_threads)); + } + + { + IPC_TOOLKIT_PROFILE_BLOCK("Block 6"); + std::this_thread::sleep_for( + std::chrono::milliseconds(sleep_time_ms / num_threads)); + } + // } + }); + } }; foo(); From 6a33b1c4b68be831571d372635d75e05d022be1b Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Tue, 5 May 2026 21:00:10 -0400 Subject: [PATCH 6/9] Apply clang-format --- src/ipc/collisions/normal/normal_collisions.cpp | 1 - src/ipc/collisions/tangential/tangential_collisions.cpp | 3 +-- src/ipc/potentials/tangential_adhesion_potential.hpp | 5 +---- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/ipc/collisions/normal/normal_collisions.cpp b/src/ipc/collisions/normal/normal_collisions.cpp index 140eed5ab..d24630e06 100644 --- a/src/ipc/collisions/normal/normal_collisions.cpp +++ b/src/ipc/collisions/normal/normal_collisions.cpp @@ -2,7 +2,6 @@ #include #include - #include #include diff --git a/src/ipc/collisions/tangential/tangential_collisions.cpp b/src/ipc/collisions/tangential/tangential_collisions.cpp index e1fd54f1a..1946afcc3 100644 --- a/src/ipc/collisions/tangential/tangential_collisions.cpp +++ b/src/ipc/collisions/tangential/tangential_collisions.cpp @@ -3,13 +3,12 @@ #include #include #include +#include #include #include #include -#include - #include // std::out_of_range #include diff --git a/src/ipc/potentials/tangential_adhesion_potential.hpp b/src/ipc/potentials/tangential_adhesion_potential.hpp index 5ecf4f15b..ee5250f8f 100644 --- a/src/ipc/potentials/tangential_adhesion_potential.hpp +++ b/src/ipc/potentials/tangential_adhesion_potential.hpp @@ -24,10 +24,7 @@ class TangentialAdhesionPotential : public TangentialPotential { m_eps_a = eps_a; } - std::string name() const override - { - return "TangentialAdhesionPotential"; - } + std::string name() const override { return "TangentialAdhesionPotential"; } protected: /// @brief Compute the value of the ∫ μ(y) f₁(y) dy, where f₁ is the first derivative of the smooth mollifier. From eff0527e4a701e2aa3bb8633018b6211487ee8fa Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Tue, 5 May 2026 21:08:00 -0400 Subject: [PATCH 7/9] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- tests/src/tests/utils/test_profiler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/src/tests/utils/test_profiler.cpp b/tests/src/tests/utils/test_profiler.cpp index 7d3e10363..ec1c84f92 100644 --- a/tests/src/tests/utils/test_profiler.cpp +++ b/tests/src/tests/utils/test_profiler.cpp @@ -96,7 +96,7 @@ TEST_CASE("Profiler", "[profiler]") CHECK(data.size() == 3); REQUIRE(data.contains("Block 3")); - CAPTURE(!data.contains("Block 4")); + CHECK_FALSE(data.contains("Block 4")); nlohmann::json block3 = data.at("Block 3"); CHECK(block3.size() == 3); // count, time_ms, Block 4 CHECK(block3["count"].get() == 2); From 37ce63296bfe569999c13fb5d0fa1f2305bd2c6c Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Tue, 5 May 2026 21:08:19 -0400 Subject: [PATCH 8/9] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- tests/src/tests/utils/test_profiler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/src/tests/utils/test_profiler.cpp b/tests/src/tests/utils/test_profiler.cpp index ec1c84f92..c24cb20b7 100644 --- a/tests/src/tests/utils/test_profiler.cpp +++ b/tests/src/tests/utils/test_profiler.cpp @@ -138,7 +138,7 @@ TEST_CASE("Profile full pipeline", "[!benchmark][profiler]") (ipc::tests::DATA_DIR / mesh_t1).string(), V1_full, F1); if (!loaded_t0 || !loaded_t1) { - WARN("Skipping profiler test: puffer-ball data not found"); + WARN("Skipping profiler test: rod-twist data not found"); return; } From f8582172ae58ac1b3b7a13674a8b66d4b447ba47 Mon Sep 17 00:00:00 2001 From: Zachary Ferguson Date: Tue, 5 May 2026 21:20:50 -0400 Subject: [PATCH 9/9] Record profiling only on main thread - Add Profiler::is_recording_thread() to check current thread id - Make ProfilePoint store m_active and skip start/stop when not on the main thread --- src/ipc/utils/profiler.hpp | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/ipc/utils/profiler.hpp b/src/ipc/utils/profiler.hpp index 5f365bc96..766fd2097 100644 --- a/src/ipc/utils/profiler.hpp +++ b/src/ipc/utils/profiler.hpp @@ -64,6 +64,11 @@ class Profiler { /// @brief Access the profiling data as a JSON object. nlohmann::json& data() { return m_data; } + bool is_recording_thread() const + { + return std::this_thread::get_id() == m_main_thread_id; + } + protected: /// @brief The profiling data stored as a JSON object. nlohmann::json m_data; @@ -101,20 +106,26 @@ template class ProfilePoint { ProfilePoint(Profiler& p_profiler, const std::string& name) : m_profiler(p_profiler) + , m_active(p_profiler.is_recording_thread()) { - m_profiler.start(name); - timer.start(); + if (m_active) { + m_profiler.start(name); + timer.start(); + } } ~ProfilePoint() { - timer.stop(); - m_profiler.stop(timer.getElapsedTimeInMilliSec()); + if (m_active) { + timer.stop(); + m_profiler.stop(timer.getElapsedTimeInMilliSec()); + } } protected: Profiler& m_profiler; Timer timer; + bool m_active; }; } // namespace ipc