From d6554411641b9ca8a56f2842208fcef4942c174b Mon Sep 17 00:00:00 2001 From: Naitik Pahwa Date: Tue, 17 Mar 2026 02:03:11 +0530 Subject: [PATCH 1/4] Add vendor-agnostic wgpu compute backend Signed-off-by: Naitik Pahwa --- dave_interfaces/CMakeLists.txt | 2 - .../multibeam_sonar/CMakeLists.txt | 191 ++- .../multibeam_sonar/MultibeamSonarSensor.cc | 279 +++- .../multibeam_sonar/MultibeamSonarSensor.hh | 18 +- .../multibeam_sonar/package.xml | 3 +- .../multibeam_sonar/sonar_compute_backend.hh | 83 ++ .../multibeam_sonar/sonar_compute_cpu.cc | 179 +++ .../multibeam_sonar/sonar_compute_wgpu.cc | 214 +++ .../multibeam_sonar/sonar_compute_wgpu.hh | 28 + .../multibeam_sonar_demo/CMakeLists.txt | 28 +- .../launch/multibeam_sonar_demo.launch.py | 11 +- .../multibeam_sonar_demo/package.xml | 2 + .../multibeam_sonar_demo/scripts/plotdata.py | 34 +- .../multibeam_sonar_system/CMakeLists.txt | 78 +- .../multibeam_sonar_system/package.xml | 2 + .../wgpu_vendor/CMakeLists.txt | 55 + .../wgpu_vendor/package.xml | 12 + .../wgpu_vendor/sonar_wgpu_rust/Cargo.lock | 1156 +++++++++++++++++ .../wgpu_vendor/sonar_wgpu_rust/Cargo.toml | 17 + .../wgpu_vendor/sonar_wgpu_rust/src/fft.rs | 268 ++++ .../wgpu_vendor/sonar_wgpu_rust/src/lib.rs | 581 +++++++++ .../sonar_wgpu_rust/src/pipeline.rs | 424 ++++++ .../src/shaders/backscatter.wgsl | 179 +++ .../sonar_wgpu_rust/src/shaders/convert.wgsl | 15 + .../sonar_wgpu_rust/src/shaders/fft.wgsl | 86 ++ .../sonar_wgpu_rust/src/shaders/matmul.wgsl | 71 + .../description/blueview_p900/model.sdf | 2 + 27 files changed, 3770 insertions(+), 248 deletions(-) create mode 100644 gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_backend.hh create mode 100644 gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_cpu.cc create mode 100644 gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_wgpu.cc create mode 100644 gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_wgpu.hh create mode 100644 gazebo/dave_gz_multibeam_sonar/wgpu_vendor/CMakeLists.txt create mode 100644 gazebo/dave_gz_multibeam_sonar/wgpu_vendor/package.xml create mode 100644 gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/Cargo.lock create mode 100644 gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/Cargo.toml create mode 100644 gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/fft.rs create mode 100644 gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/lib.rs create mode 100644 gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/pipeline.rs create mode 100644 gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/backscatter.wgsl create mode 100644 gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/convert.wgsl create mode 100644 gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/fft.wgsl create mode 100644 gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/matmul.wgsl diff --git a/dave_interfaces/CMakeLists.txt b/dave_interfaces/CMakeLists.txt index 36eda427..56276f13 100644 --- a/dave_interfaces/CMakeLists.txt +++ b/dave_interfaces/CMakeLists.txt @@ -11,8 +11,6 @@ find_package(rclcpp REQUIRED) find_package(std_msgs REQUIRED) find_package(geometry_msgs REQUIRED) find_package(rosidl_default_generators REQUIRED) -find_package(gz-cmake3 REQUIRED) -find_package(gz-msgs10 REQUIRED) rosidl_generate_interfaces(${PROJECT_NAME} "msg/UsblCommand.msg" diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/CMakeLists.txt b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/CMakeLists.txt index c5a1d2d0..c02f0708 100644 --- a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/CMakeLists.txt +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/CMakeLists.txt @@ -11,113 +11,104 @@ cmake_policy(SET CMP0144 NEW) project(multibeam_sonar) -set(CUDA_ARCHITECTURE "60" CACHE STRING "Target CUDA SM version") - find_package(ament_cmake REQUIRED) -find_package(CUDAToolkit QUIET) - -if(CUDAToolkit_FOUND) - - enable_language(CUDA) - find_package(CUDA REQUIRED) - message(STATUS "CUDA found, enabling CUDA support.") - include_directories(${CUDA_INCLUDE_DIRS}) - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -arch=sm_${CUDA_ARCHITECTURE}") - find_package(gz-cmake3 REQUIRED) - find_package(gz-sim8 REQUIRED) - find_package(gz-sensors8 REQUIRED) - find_package(gz-rendering8 REQUIRED OPTIONAL_COMPONENTS ogre ogre2) - find_package(gz-transport13 REQUIRED) - find_package(rclcpp REQUIRED) - find_package(sensor_msgs REQUIRED) - find_package(geometry_msgs REQUIRED) - find_package(rosidl_default_generators REQUIRED) - find_package(OpenCV REQUIRED) - find_package(marine_acoustic_msgs REQUIRED) - find_package(cv_bridge REQUIRED) - - set(GZ_MSGS_VER ${gz-msgs10_VERSION_MAJOR}) - set(GZ_RENDERING_VER ${gz-rendering8_VERSION_MAJOR}) - set(GZ_SENSORS_VER ${gz-sensors8_VERSION_MAJOR}) - set(GZ_TRANSPORT_VER ${gz-transport13_VERSION_MAJOR}) - set(GZ_SIM_VER ${gz-sim8_VERSION_MAJOR}) - - if(TARGET gz-rendering${GZ_RENDERING_VER}::ogre) - set(HAVE_OGRE TRUE) - set(GZ_RENDERING_TARGET gz-rendering${GZ_RENDERING_VER}-ogre) - add_definitions(-DWITH_OGRE) - endif() +find_package(gz-cmake REQUIRED) +find_package(gz-sim REQUIRED) +find_package(gz-sensors REQUIRED) +find_package(gz-rendering REQUIRED OPTIONAL_COMPONENTS ogre ogre2) +find_package(gz-transport REQUIRED) +find_package(gz-msgs REQUIRED) +find_package(rclcpp REQUIRED) +find_package(sensor_msgs REQUIRED) +find_package(geometry_msgs REQUIRED) +find_package(OpenCV REQUIRED) +find_package(marine_acoustic_msgs REQUIRED) +find_package(cv_bridge REQUIRED) +find_package(wgpu_vendor REQUIRED) + +if(TARGET gz-rendering::ogre) + add_definitions(-DWITH_OGRE) +endif() +if(TARGET gz-rendering::ogre2) + add_definitions(-DWITH_OGRE2) +endif() - if(TARGET gz-rendering${GZ_RENDERING_VER}::ogre2) - set(HAVE_OGRE2 TRUE) - set(GZ_RENDERING_TARGET gz-rendering${GZ_RENDERING_VER}-ogre2) - add_definitions(-DWITH_OGRE2) - endif() +add_library(${PROJECT_NAME} SHARED + MultibeamSonarSensor.cc + sonar_compute_cpu.cc + sonar_compute_wgpu.cc +) + +target_include_directories(${PROJECT_NAME} PUBLIC + ${gz-sensors_INCLUDE_DIRS} + ${gz-rendering_INCLUDE_DIRS} + ${gz-msgs_INCLUDE_DIRS} + ${gz-transport_INCLUDE_DIRS} + ${gz-sim_INCLUDE_DIRS} + ${rclcpp_INCLUDE_DIRS} + ${sensor_msgs_INCLUDE_DIRS} + ${geometry_msgs_INCLUDE_DIRS} + ${OpenCV_INCLUDE_DIRS} + ${marine_acoustic_msgs_INCLUDE_DIRS} + ${cv_bridge_INCLUDE_DIRS} +) + +if(DEFINED ENV{ROS_DISTRO} AND EXISTS "/opt/ros/$ENV{ROS_DISTRO}/include/cv_bridge") + target_include_directories(${PROJECT_NAME} PUBLIC "/opt/ros/$ENV{ROS_DISTRO}/include/cv_bridge") +elseif(EXISTS "/opt/ros/rolling/include/cv_bridge") + target_include_directories(${PROJECT_NAME} PUBLIC "/opt/ros/rolling/include/cv_bridge") +endif() + +target_link_libraries(${PROJECT_NAME} + gz-sim::gz-sim + gz-sensors::gz-sensors + gz-msgs::gz-msgs + gz-transport::gz-transport + gz-rendering::gz-rendering + ${rclcpp_LIBRARIES} + ${sensor_msgs_LIBRARIES} + ${geometry_msgs_LIBRARIES} + ${OpenCV_LIBS} + ${marine_acoustic_msgs_LIBRARIES} +) + +if(TARGET wgpu::wgpu_vendor) + target_link_libraries(${PROJECT_NAME} wgpu::wgpu_vendor) +elseif(TARGET wgpu_vendor) + target_link_libraries(${PROJECT_NAME} wgpu_vendor) +endif() - add_library(${PROJECT_NAME} - SHARED - MultibeamSonarSensor.cc - sonar_calculation_cuda.cu - ) - - set_target_properties(${PROJECT_NAME} - PROPERTIES - CUDA_SEPARABLE_COMPILATION ON - ) - - target_include_directories(${PROJECT_NAME} - PUBLIC - ${gz-sensors${GZ_SENSORS_VER}_INCLUDE_DIRS} - ${gz-rendering${GZ_RENDERING_VER}_INCLUDE_DIRS} - ${gz-msgs${GZ_MSGS_VER}_INCLUDE_DIRS} - ${CUDA_INCLUDE_DIRS} - ${gz-transport${GZ_TRANSPORT_VER}_INCLUDE_DIRS} - ${gz-sim${GZ_SIM_VER}_INCLUDE_DIRS} - ) - - find_library(CUBLAS_LIB cublas - HINTS - ${CUDAToolkit_LIBRARY_DIR} - /usr/local/cuda/lib64 - /usr/lib/x86_64-linux-gnu - /usr/lib - ) - if(NOT CUBLAS_LIB) - message(FATAL_ERROR "cuBLAS not found") +# Ensure the Rust static archive is linked into this shared library. +# The exported interface target may not always propagate non-installed imported +# targets across package boundaries in overlay builds. +if(DEFINED wgpu_vendor_DIR) + get_filename_component(_wgpu_vendor_prefix "${wgpu_vendor_DIR}/../../.." ABSOLUTE) + set(_sonar_wgpu_static "${_wgpu_vendor_prefix}/lib/libsonar_wgpu.a") + if(EXISTS "${_sonar_wgpu_static}") + target_link_libraries(${PROJECT_NAME} "${_sonar_wgpu_static}") endif() +endif() - target_link_libraries(${PROJECT_NAME} - ${CUDA_LIBRARIES} - ${CUDA_CUFFT_LIBRARIES} - ${CUBLAS_LIB} - ) - - install(TARGETS ${PROJECT_NAME} - DESTINATION lib/${PROJECT_NAME} - ) - - ament_target_dependencies(${PROJECT_NAME} - gz-sensors${GZ_SENSORS_VER} - ${GZ_RENDERING_TARGET} - gz-msgs${GZ_MSGS_VER} - gz-transport${GZ_TRANSPORT_VER} - gz-sim${GZ_SIM_VER} - rclcpp - sensor_msgs - rosidl_default_generators - OpenCV - marine_acoustic_msgs - geometry_msgs - cv_bridge - ) - - ament_environment_hooks( - "${CMAKE_CURRENT_SOURCE_DIR}/hooks/${PROJECT_NAME}.dsv.in" - ) +if(TARGET cv_bridge::cv_bridge) + target_link_libraries(${PROJECT_NAME} cv_bridge::cv_bridge) +elseif(TARGET cv_bridge) + target_link_libraries(${PROJECT_NAME} cv_bridge) +else() + target_link_libraries(${PROJECT_NAME} ${cv_bridge_LIBRARIES}) +endif() +if(TARGET gz-sensors::gz-sensors-rendering) + target_link_libraries(${PROJECT_NAME} gz-sensors::gz-sensors-rendering) else() - message(STATUS "CUDA Toolkit not found or disabled: " - "Skipping CUDA-specific targets") + find_library(GZ_SENSORS_RENDERING_LIB gz-sensors-rendering) + if(GZ_SENSORS_RENDERING_LIB) + target_link_libraries(${PROJECT_NAME} ${GZ_SENSORS_RENDERING_LIB}) + endif() endif() +install(TARGETS ${PROJECT_NAME} DESTINATION lib/${PROJECT_NAME}) + +ament_environment_hooks( + "${CMAKE_CURRENT_SOURCE_DIR}/hooks/${PROJECT_NAME}.dsv.in") + ament_package() diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/MultibeamSonarSensor.cc b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/MultibeamSonarSensor.cc index 2840e20a..27841226 100644 --- a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/MultibeamSonarSensor.cc +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/MultibeamSonarSensor.cc @@ -46,9 +46,9 @@ #include #include #include "MultibeamSonarSensor.hh" -#include "sonar_calculation_cuda.cuh" #include +#include #include #include #include @@ -142,7 +142,7 @@ class InMemoryTimeVaryingVectorField } if (this->zData && this->zSession) { - const auto interpolation = this->zData->LookUp(this->ySession.value(), _pos); + const auto interpolation = this->zData->LookUp(this->zSession.value(), _pos); outcome.Z(interpolation.value_or(0.)); } return outcome; @@ -183,6 +183,21 @@ MultibeamSonarSensor::MultibeamSonarSensor() : dataPtr(new Implementation()) {} ////////////////////////////////////////////////// MultibeamSonarSensor::~MultibeamSonarSensor() { + // Stop the background compute thread before tearing down sensors. + { + std::lock_guard lk(this->dataPtr->computeMutex_); + this->dataPtr->stopThread_ = true; + } + this->dataPtr->computeCV_.notify_one(); + if (this->dataPtr->computeThread_.joinable()) + { + this->dataPtr->computeThread_.join(); + } + + if (this->dataPtr->ros_executor_ && this->dataPtr->ros_node_) + { + this->dataPtr->ros_executor_->remove_node(this->dataPtr->ros_node_); + } this->dataPtr->rayConnection.reset(); this->dataPtr->sceneChangeConnection.reset(); // CSV log write stream close @@ -286,6 +301,24 @@ bool MultibeamSonarSensor::Implementation::Initialize(MultibeamSonarSensor * _se this->referenceFrameRotation = referenceFrameTransform.Rot().Inverse(); + // Spawn background sonar compute thread now that all setup is complete. + this->computeThread_ = std::thread( + [this]() + { + while (true) + { + std::unique_lock lk(this->computeMutex_); + this->computeCV_.wait(lk, [this] { return this->newFrameReady_ || this->stopThread_; }); + if (this->stopThread_) + { + break; + } + this->newFrameReady_ = false; + lk.unlock(); + this->ComputeSonarImage(); + } + }); + gzmsg << "Initialized [" << _sensor->Name() << "] sensor." << std::endl; this->initialized = true; return true; @@ -383,18 +416,31 @@ bool MultibeamSonarSensor::Implementation::InitializeBeamArrangement(MultibeamSo sensorElement->Get("sonarImageTopicName", "sonar_image").first; gzmsg << "sonarImageTopicName: " << this->sonarImageTopicName << std::endl; + // NOTE: frameName is the TF frame name for published sonar messages (frame_id in ROS headers). + // This is explicitly configured in SDF rather than derived from Gazebo's + // internal sensor frame ID. Must match TF tree for RViz transforms to work correctly. + // Verify in SDF files that matches the expected TF frame name. this->frameName = sensorElement->Get("frameName", "forward_sonar_optical_link").first; gzmsg << "frameName: " << this->frameName << std::endl; - this->frameId = _sensor->FrameId(); - size_t pos = 0; - while ((pos = this->frameId.find("::", pos)) != std::string::npos) + if (const char * backendEnv = std::getenv("DAVE_SONAR_COMPUTE_BACKEND")) + { + this->requestedBackend = backendEnv; + } + else + { + this->requestedBackend = "auto"; + } + + this->computeBackend = CreateComputeBackend(this->requestedBackend); + if (!this->computeBackend) { - this->frameId.replace(pos, 2, "/"); - pos += 1; + gzerr << "Unable to initialize requested sonar backend [" << this->requestedBackend << "]." + << std::endl; + return false; } - gzmsg << "frameId: " << this->frameId << std::endl; + gzmsg << "Using sonar compute backend: " << this->computeBackend->Name() << std::endl; // ROS Initialization @@ -404,7 +450,8 @@ bool MultibeamSonarSensor::Implementation::InitializeBeamArrangement(MultibeamSo } this->ros_node_ = std::make_shared("multibeam_sonar_node"); - this->ros_node_->set_parameters({rclcpp::Parameter("use_sim_time", true)}); + this->ros_executor_ = std::make_shared(); + this->ros_executor_->add_node(this->ros_node_); // Create the point cloud publisher this->pointPub = this->node.Advertise( @@ -689,6 +736,36 @@ bool MultibeamSonarSensor::Implementation::InitializeBeamArrangement(MultibeamSo this->constMu = true; this->mu = 1e-3; + SonarComputeInput prototype; + prototype.depthImage = &this->pointCloudImage; + prototype.nBeams = this->nBeams; + prototype.nRays = this->nRays; + prototype.nFreq = this->nFreq; + prototype.raySkips = this->raySkips; + prototype.hFOV = this->hFOV; + prototype.vFOV = this->vFOV; + prototype.maxDistance = this->maxDistance; + prototype.soundSpeed = this->soundSpeed; + prototype.attenuation = this->attenuation; + prototype.sensorGain = this->sensorGain; + prototype.blazingFlag = this->blazingFlag; + prototype.window = this->window; + prototype.rangeVector = this->rangeVector; + prototype.beamCorrector = this->beamCorrector; + prototype.beamCorrectorSum = this->beamCorrectorSum; + prototype.sourceLevel = this->sourceLevel; + prototype.bandwidth = this->bandwidth; + prototype.sonarFreq = this->sonarFreq; + prototype.elevation_angles.assign(this->elevation_angles, this->elevation_angles + this->nRays); + prototype.seed = this->sonarSeed; + + if (!this->computeBackend->Initialize(prototype)) + { + gzerr << "Failed to initialize sonar backend [" << this->computeBackend->Name() << "]." + << std::endl; + return false; + } + return true; } @@ -724,9 +801,14 @@ void MultibeamSonarSensor::Implementation::OnNewFrame( // Fill point cloud with the ray buffer this->FillPointCloudMsg(this->rayBuffer); + // Signals the background compute thread to process the new frame if (this->pointCloudImage.size().width != 0) { - this->ComputeSonarImage(); + { + std::lock_guard lk(this->computeMutex_); + this->newFrameReady_ = true; + } + this->computeCV_.notify_one(); } } @@ -805,7 +887,10 @@ void MultibeamSonarSensor::PostUpdate(const std::chrono::steady_clock::duration << "cannot estimate velocities." << std::endl; return; } - rclcpp::spin_some(this->dataPtr->ros_node_); + if (this->dataPtr->ros_executor_) + { + this->dataPtr->ros_executor_->spin_some(); + } if (this->dataPtr->publishingPointCloud) { @@ -998,9 +1083,11 @@ cv::Mat MultibeamSonarSensor::Implementation::ComputeNormalImage(cv::Mat & depth images.at(0) = n1; // for green channel images.at(1) = n2; // for red channel - // Calculate focal length (?) Not sure if this is the right way; - // I could not find in the original DAVE the definition of the focal length - double focal_length = (0.5 * this->pointMsg.width()) / tan(0.5 * this->hFOV); + // Calculate focal length using the actual depth image dimensions (cols) so + // this function is safe to call from a background thread without accessing + // the shared pointMsg field. Avoid data race with pointMsg.width() which may + // be being written to by the render thread concurrently. + double focal_length = (0.5 * depth.cols) / tan(0.5 * this->hFOV); images.at(2) = 1.0 / focal_length * depth; // for blue channel cv::Mat normal_image; @@ -1043,21 +1130,51 @@ void MultibeamSonarSensor::Implementation::ComputeCorrector() void MultibeamSonarSensor::Implementation::ComputeSonarImage() { - this->lock_.lock(); - cv::Mat normal_image = this->ComputeNormalImage(this->pointCloudImage); - double vPixelSize = this->vFOV / (this->pointMsg.height() - 1); - double hPixelSize = this->hFOV / (this->pointMsg.width() - 1); - - if (this->beamCorrectorSum == 0) + // Snapshot the depth image quickly, then release the lock so the render + // thread can fill the next frame without waiting for GPU compute. + cv::Mat depthSnapshot; + // Capture the timestamp and image dimensions while holding the lock so that + // all published messages share one coherent stamp matching when the data was + // recorded -- mismatched stamps are the primary cause of RViz flicker. + rclcpp::Time capturedStamp; { - ComputeCorrector(); - } + std::lock_guard snapshot_lk(this->lock_); + if (this->pointCloudImage.empty()) + { + return; + } + depthSnapshot = this->pointCloudImage.clone(); + capturedStamp = this->ros_node_->now(); - if (this->reflectivityImage.rows == 0) - { - this->reflectivityImage = - cv::Mat(this->pointMsg.width(), this->pointMsg.height(), CV_32FC1, cv::Scalar(this->mu)); + if (this->beamCorrectorSum == 0) + { + ComputeCorrector(); + } + + if (this->reflectivityImage.rows == 0) + { + this->reflectivityImage = + cv::Mat(depthSnapshot.rows, depthSnapshot.cols, CV_32FC1, cv::Scalar(this->mu)); + } } + // lock_ released -> render thread can proceed immediately. + + // NOTE: reflectivityImage is used here without a lock. + // This is safe for now because only the compute thread writes to it + // and the render thread only reads it. If the render thread ever starts modifying + // reflectivityImage, this will cause a data race. + // TODO: Either guarantee that reflectivityImage stays read-only + // after initialization, or add proper locking around the compute step. + + // Use snapshot dimensions from here on to avoid racing with the render thread + // that may be writing pointMsg concurrently. + const int snapWidth = depthSnapshot.cols; + const int snapHeight = depthSnapshot.rows; + + double vPixelSize = this->vFOV / (snapHeight - 1); + double hPixelSize = this->hFOV / (snapWidth - 1); + + cv::Mat normal_image = this->ComputeNormalImage(depthSnapshot); auto start = std::chrono::high_resolution_clock::now(); @@ -1065,35 +1182,49 @@ void MultibeamSonarSensor::Implementation::ComputeSonarImage() // -------- Sonar calculations -------- // // ------------------------------------------------// - CArray2D P_Beams = NpsGazeboSonar::sonar_calculation_wrapper( - this->pointCloudImage, // cv::Mat& depth_image (the point cloud image) - normal_image, // cv::Mat& normal_image - hPixelSize, // hPixelSize - vPixelSize, // vPixelSize - hFOV, // hFOV - vFOV, // VFOV - hPixelSize, // _beam_azimuthAngleWidth - verticalFOV / 180 * M_PI, // _beam_elevationAngleWidth - hPixelSize, // _ray_azimuthAngleWidth - this->elevation_angles, // _ray_elevationAngles - vPixelSize * (raySkips + 1), // _ray_elevationAngleWidth - this->soundSpeed, // _soundSpeed - this->maxDistance, // _maxDistance - this->sourceLevel, // _sourceLevel - this->nBeams, // _nBeams - this->nRays, // _nRays - this->raySkips, // _raySkips - this->sonarFreq, // _sonarFreq - this->bandwidth, // _bandwidth - this->nFreq, // _nFreq - this->reflectivityImage, // reflectivity_image - this->attenuation, // _attenuation - this->window, // _window - this->beamCorrector, // _beamCorrector - this->beamCorrectorSum, // _beamCorrectorSum - this->debugFlag, // debugFlag - this->blazingFlag // _blazingFlag - ); + SonarComputeInput input; + input.depthImage = &depthSnapshot; + input.normalImage = &normal_image; + input.reflectivityImage = &this->reflectivityImage; + input.nBeams = this->nBeams; + input.nRays = this->nRays; + input.nFreq = this->nFreq; + input.raySkips = this->raySkips; + input.hFOV = this->hFOV; + input.vFOV = this->vFOV; + input.maxDistance = this->maxDistance; + input.soundSpeed = this->soundSpeed; + input.attenuation = this->attenuation; + input.sensorGain = this->sensorGain; + input.blazingFlag = this->blazingFlag; + input.window = this->window; + input.rangeVector = this->rangeVector; + input.beamCorrector = this->beamCorrector; + input.beamCorrectorSum = this->beamCorrectorSum; + input.sourceLevel = this->sourceLevel; + input.bandwidth = this->bandwidth; + input.sonarFreq = this->sonarFreq; + input.elevation_angles.assign(this->elevation_angles, this->elevation_angles + this->nRays); + input.frameIndex = this->frameCounter++; + input.seed = this->sonarSeed; + + SonarComputeOutput backendOutput; + if (!this->computeBackend->Compute(input, backendOutput)) + { + RCLCPP_ERROR_STREAM( + this->ros_node_->get_logger(), + "Sonar compute backend failed: " << this->computeBackend->Name()); + return; + } + + CArray2D P_Beams(CArray(this->nFreq), this->nBeams); + for (int beam = 0; beam < this->nBeams; ++beam) + { + for (int freq = 0; freq < this->nFreq; ++freq) + { + P_Beams[beam][freq] = backendOutput.At(beam, freq); + } + } // For calc time measure auto stop = std::chrono::high_resolution_clock::now(); @@ -1102,8 +1233,9 @@ void MultibeamSonarSensor::Implementation::ComputeSonarImage() if (debugFlag) { RCLCPP_INFO_STREAM( - this->ros_node_->get_logger(), - "GPU Sonar Frame Calc Time " << duration.count() / 10000 << "/100 [s]\n"); + this->ros_node_->get_logger(), this->computeBackend->Name() + << " Sonar Frame Calc Time " << duration.count() / 10000 + << "/100 [s]\n"); } // CSV log write stream @@ -1164,13 +1296,22 @@ void MultibeamSonarSensor::Implementation::ComputeSonarImage() } } - rclcpp::Time now = this->ros_node_->now(); + // Use the single timestamp captured at snapshot time for all publications. + // This ensures sonarRawDataMsg, sonarImgMsg, and normalImgMsg all carry the + // same stamp - the sim time when the depth frame was actually recorded - + // so RViz TF lookups succeed and images don't flicker. - this->sonarRawDataMsg.header.frame_id = this->frameId; + // FEEDBACK_NEEDED/TODO: Using frameName (from SDF tag) as the ROS frame_id. + // This is user-friendly but requires the SDF value to match an actual TF frame exactly. + // The old approach used frameId (Gazebo's internal sensor ID) which was more likely + // to match the TF tree automatically. + // If TF lookup errors appear in RViz, shall revert back to frameId. + + this->sonarRawDataMsg.header.frame_id = this->frameName; - this->sonarRawDataMsg.header.stamp.sec = static_cast(now.seconds()); + this->sonarRawDataMsg.header.stamp.sec = static_cast(capturedStamp.seconds()); this->sonarRawDataMsg.header.stamp.nanosec = - static_cast(now.nanoseconds() % 1000000000); + static_cast(capturedStamp.nanoseconds() % 1000000000); marine_acoustic_msgs::msg::PingInfo ping_info_msg_; @@ -1308,10 +1449,10 @@ void MultibeamSonarSensor::Implementation::ComputeSonarImage() cv::Mat Itensity_image_color; cv::applyColorMap(Intensity_image, Itensity_image_color, cv::COLORMAP_HOT); - now = this->ros_node_->now(); this->sonarImgMsg.header.frame_id = this->frameName; - this->sonarImgMsg.header.stamp.sec = static_cast(now.seconds()); - this->sonarImgMsg.header.stamp.nanosec = static_cast(now.nanoseconds() % 1000000000); + this->sonarImgMsg.header.stamp.sec = static_cast(capturedStamp.seconds()); + this->sonarImgMsg.header.stamp.nanosec = + static_cast(capturedStamp.nanoseconds() % 1000000000); img_bridge = cv_bridge::CvImage( this->sonarImgMsg.header, sensor_msgs::image_encodings::BGR8, Itensity_image_color); @@ -1319,12 +1460,10 @@ void MultibeamSonarSensor::Implementation::ComputeSonarImage() img_bridge.toImageMsg(this->sonarImgMsg); this->sonarImagePub->publish(this->sonarImgMsg); - // ---------------------------------------- End of sonar calculation - - now = this->ros_node_->now(); this->normalImgMsg.header.frame_id = this->frameName; - this->normalImgMsg.header.stamp.sec = static_cast(now.seconds()); - this->normalImgMsg.header.stamp.nanosec = static_cast(now.nanoseconds() % 1000000000); + this->normalImgMsg.header.stamp.sec = static_cast(capturedStamp.seconds()); + this->normalImgMsg.header.stamp.nanosec = + static_cast(capturedStamp.nanoseconds() % 1000000000); cv::Mat normal_image8; normal_image.convertTo(normal_image8, CV_8UC3, 255.0); @@ -1334,7 +1473,7 @@ void MultibeamSonarSensor::Implementation::ComputeSonarImage() // from cv_bridge to sensor_msgs::Image this->normalImagePub->publish(this->normalImgMsg); - this->lock_.unlock(); + // ---------------------------------------- End of sonar calculation } } // namespace sensors diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/MultibeamSonarSensor.hh b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/MultibeamSonarSensor.hh index 3138bd10..7e3b65d2 100644 --- a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/MultibeamSonarSensor.hh +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/MultibeamSonarSensor.hh @@ -3,9 +3,11 @@ #include #include +#include #include #include #include +#include #include #include #include @@ -24,6 +26,7 @@ #include #include "AcousticBeam.hh" #include "AxisAlignedPatch2.hh" +#include "sonar_compute_backend.hh" namespace gz { @@ -108,8 +111,16 @@ private: GZ_UTILS_WARN_IGNORE__DLL_INTERFACE_MISSING mutable std::mutex rayMutex; GZ_UTILS_WARN_RESUME__DLL_INTERFACE_MISSING + + // Background sonar compute thread and synchronization + std::thread computeThread_; + std::condition_variable computeCV_; + std::mutex computeMutex_; + bool newFrameReady_{false}; + bool stopThread_{false}; // ROS node pointer std::shared_ptr ros_node_; + std::shared_ptr ros_executor_; // SDF sensor element sdf::ElementPtr sensorSdf; @@ -153,7 +164,6 @@ private: std::string sonarImageRawTopicName; std::string sonarImageTopicName; std::string frameName; - std::string frameId; // for non-optical frame id from sensor // Ray parameters int raySkips; @@ -169,6 +179,12 @@ private: bool constMu; double mu; + // Backend selection and determinism controls. + std::string requestedBackend{"auto"}; + uint64_t sonarSeed{12345}; + uint64_t frameCounter{0}; + std::unique_ptr computeBackend; + // Beam corrector float beamCorrectorSum; float ** beamCorrector; diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/package.xml b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/package.xml index 593fd5de..5e632334 100644 --- a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/package.xml +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/package.xml @@ -7,7 +7,7 @@ Helena Moyen Gaurav kumar Apache 2.0 - ament_cmake + ament_cmake rclcpp sensor_msgs geometry_msgs @@ -17,6 +17,7 @@ pcl_conversions dave_interfaces protobuf + wgpu_vendor ament_lint_auto ament_cmake diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_backend.hh b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_backend.hh new file mode 100644 index 00000000..b0750763 --- /dev/null +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_backend.hh @@ -0,0 +1,83 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace gz +{ +namespace sensors +{ + +struct SonarComputeInput +{ + const cv::Mat * depthImage{nullptr}; + const cv::Mat * normalImage{nullptr}; + const cv::Mat * reflectivityImage{nullptr}; + + int nBeams{0}; + int nRays{0}; + int nFreq{0}; + int raySkips{1}; + + double hFOV{0.0}; + double vFOV{0.0}; + double maxDistance{0.0}; + double soundSpeed{1500.0}; + double attenuation{0.0}; + + float sensorGain{0.02f}; + bool blazingFlag{false}; + + const float * window{nullptr}; + const float * rangeVector{nullptr}; + float ** beamCorrector{nullptr}; + float beamCorrectorSum{0.0f}; + + double sourceLevel{220.0}; + double bandwidth{29.5e6}; + double sonarFreq{900e3}; + std::vector elevation_angles; + + uint64_t frameIndex{0}; + uint64_t seed{0}; +}; + +struct SonarComputeOutput +{ + int nBeams{0}; + int nFreq{0}; + std::vector> beamSpectrum; + double computeMicros{0.0}; + + std::complex & At(int beam, int freq) + { + return this->beamSpectrum[static_cast(beam) * this->nFreq + freq]; + } + + const std::complex & At(int beam, int freq) const + { + return this->beamSpectrum[static_cast(beam) * this->nFreq + freq]; + } +}; + +class ComputeBackend +{ +public: + virtual ~ComputeBackend() = default; + + virtual const char * Name() const = 0; + + virtual bool Initialize(const SonarComputeInput & prototype) = 0; + + virtual bool Compute(const SonarComputeInput & input, SonarComputeOutput & output) = 0; +}; + +std::unique_ptr CreateComputeBackend(const std::string & requestedBackend); + +} // namespace sensors +} // namespace gz diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_cpu.cc b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_cpu.cc new file mode 100644 index 00000000..b997c187 --- /dev/null +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_cpu.cc @@ -0,0 +1,179 @@ +#include "sonar_compute_backend.hh" + +#include +#include +#include + +// NOTE: This file serves two responsibilities -- it implements the CPU compute +// backend (CpuComputeBackend) AND hosts the CreateComputeBackend() factory +// that selects between CPU and WGPU at runtime. Because the factory lives here, +// this file must include sonar_compute_wgpu.hh, which creates a hard compile-time +// dependency on the WGPU backend even when only CPU support is needed. +// +// TODO: Move CreateComputeBackend() into its own file (e.g. sonar_compute_factory.cc) +// so that the CPU and WGPU backends are fully decoupled and the factory is the +// only translation unit that needs to know about both. +#include "sonar_compute_wgpu.hh" + +namespace gz +{ +namespace sensors +{ +namespace +{ + +class CpuComputeBackend : public ComputeBackend +{ +public: + const char * Name() const override { return "cpu"; } + + bool Initialize(const SonarComputeInput &) override { return true; } + + bool Compute(const SonarComputeInput & input, SonarComputeOutput & output) override + { + if (!input.depthImage || !input.normalImage || input.nBeams <= 0 || input.nFreq <= 0) + { + return false; + } + + auto start = std::chrono::high_resolution_clock::now(); + + output.nBeams = input.nBeams; + output.nFreq = input.nFreq; + output.beamSpectrum.assign( + static_cast(input.nBeams) * static_cast(input.nFreq), + std::complex(0.0f, 0.0f)); + + const cv::Mat & depth = *input.depthImage; + const cv::Mat & normal = *input.normalImage; + + if (depth.empty() || depth.type() != CV_32FC1 || normal.empty() || normal.type() != CV_32FC3) + { + return false; + } + + const int rows = depth.rows; + const int cols = depth.cols; + const float maxDistance = static_cast(std::max(1e-6, input.maxDistance)); + const float attenuation = static_cast(input.attenuation); + + const float source_term = + std::sqrt(std::pow(10.0f, static_cast(input.sourceLevel) / 10.0f)) * 1e-6f; + const float h_pixel = static_cast(input.hFOV) / std::max(1, input.nBeams - 1); + const float v_pixel = static_cast(input.vFOV) / std::max(1, input.nRays - 1); + const float area_scaler = h_pixel * v_pixel * static_cast(input.raySkips + 1); + + for (int r = 0; r < rows; ++r) + { + const float * depthRow = depth.ptr(r); + const cv::Vec3f * normalRow = normal.ptr(r); + for (int c = 0; c < cols; ++c) + { + const float d = depthRow[c]; + if (!std::isfinite(d) || d <= 0.0f || d > maxDistance) + { + continue; + } + + const int beam = std::min(input.nBeams - 1, (c * input.nBeams) / std::max(1, cols)); + const int bin = + std::min(input.nFreq - 1, static_cast((d / maxDistance) * input.nFreq)); + + const float nz = std::max(0.0f, normalRow[c][2]); + float reflectivity = 1.0f; + if (input.reflectivityImage && !input.reflectivityImage->empty()) + { + reflectivity = input.reflectivityImage->at(r, c); + } + + // CUDA-aligned physics: spread + lambert + stochastic phase term + const float lambert = std::sqrt(nz); // sqrt(cos(incidence)); nz == cos(acos(nz)) + const float spread = 1.0f / std::max(1e-6f, d * d); + const float target_area = std::sqrt(d * area_scaler); + const float amplitude_mag = input.sensorGain * source_term * spread * + std::exp(-2.0f * attenuation * d) * lambert * + std::sqrt(reflectivity) * target_area; + + const float phase_noise = static_cast((beam * input.nRays + r) % input.nFreq) / + static_cast(input.nFreq) * 2.0f * static_cast(M_PI); + output.At(beam, bin) += std::complex( + amplitude_mag * std::cos(phase_noise) / std::sqrt(2.0f), + amplitude_mag * std::sin(phase_noise) / std::sqrt(2.0f)); + } + } + + // Apply the configured range window. + if (input.window) + { + for (int b = 0; b < input.nBeams; ++b) + { + for (int f = 0; f < input.nFreq; ++f) + { + output.At(b, f) *= input.window[f]; + } + } + } + + // Apply beam correction matrix when available. + if (input.beamCorrector && input.beamCorrectorSum > 0.0f) + { + std::vector> corrected = output.beamSpectrum; + const float norm = input.beamCorrectorSum; + for (int b = 0; b < input.nBeams; ++b) + { + for (int f = 0; f < input.nFreq; ++f) + { + std::complex accum{0.0f, 0.0f}; + for (int k = 0; k < input.nBeams; ++k) + { + const float w = input.beamCorrector[b][k] / norm; + accum += output.At(k, f) * w; + } + corrected[static_cast(b) * input.nFreq + f] = accum; + } + } + output.beamSpectrum.swap(corrected); + } + + const auto stop = std::chrono::high_resolution_clock::now(); + output.computeMicros = + std::chrono::duration_cast(stop - start).count(); + return true; + } +}; + +} // namespace + +std::unique_ptr CreateComputeBackend(const std::string & requestedBackend) +{ + std::string backend = requestedBackend; + std::transform( + backend.begin(), backend.end(), backend.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + + if (backend == "wgpu") + { + auto wgpuBackend = std::make_unique(); + SonarComputeInput probe; + if (wgpuBackend->Initialize(probe)) + { + return wgpuBackend; + } + return nullptr; + } + + if (backend == "auto") + { + auto wgpuBackend = std::make_unique(); + SonarComputeInput probe; + if (wgpuBackend->Initialize(probe)) + { + return wgpuBackend; + } + } + + return std::make_unique(); +} + +} // namespace sensors +} // namespace gz diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_wgpu.cc b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_wgpu.cc new file mode 100644 index 00000000..698b1fd1 --- /dev/null +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_wgpu.cc @@ -0,0 +1,214 @@ +#include "sonar_compute_wgpu.hh" + +#include +#include +#include +#include +#include +#include + +extern "C" +{ + float * sonar_wgpu_compute( + const float * depth_flat, const float * normal_flat, const float * reflectivity_flat, + const float * beam_corrector_flat, const float * window_flat, uint32_t n_beams, uint32_t n_rays, + uint32_t n_freq, uint32_t ray_skips, float sound_speed, float max_distance, float source_level, + float attenuation, float sensor_gain, float bandwidth, float beam_corrector_sum, + float area_scaler, float h_fov, float v_fov, uint64_t frame_index, uint64_t seed); + + void sonar_wgpu_free(float * ptr, size_t len); +} + +namespace gz +{ +namespace sensors +{ + +const char * WgpuComputeBackend::Name() const { return "wgpu"; } + +bool WgpuComputeBackend::Initialize(const SonarComputeInput &) +{ + // Probe the Rust FFI to check if GPU is available. + // If probe returns null, GPU is unavailable -> we'll fall back to CPU in Compute(). + std::vector depth(1, 0.0f); + std::vector normal(3, 0.0f); + std::vector reflectivity(1, 0.0f); + std::vector beamCorrector(1, 1.0f); + std::vector window(4, 1.0f); + + float * probe = sonar_wgpu_compute( + depth.data(), normal.data(), reflectivity.data(), beamCorrector.data(), window.data(), 1u, 1u, + 4u, 0u, 1500.0f, 10.0f, 220.0f, 0.0f, 1.0f, 29500000.0f, 1.0f, 1e-6f, 0.0f, 0.0f, 0u, 1u); + + if (probe) + { + sonar_wgpu_free(probe, 1u * 4u * 2u); + this->gpuAvailable = true; + } + else + { + this->gpuAvailable = false; + std::cerr << "[sonar_wgpu] GPU probe failed -> will fall back to CPU at compute time." + << std::endl; + } + + this->initialized = true; + return true; +} + +bool WgpuComputeBackend::Compute(const SonarComputeInput & input, SonarComputeOutput & output) +{ + if (!this->initialized) + { + return false; + } + + // If GPU is unavailable, go directly to CPU fallback + if (!this->gpuAvailable) + { + if (!this->cpuFallback) + { + this->cpuFallback = CreateComputeBackend("cpu"); + if (this->cpuFallback) + { + this->cpuFallback->Initialize(input); + } + } + if (this->cpuFallback) + { + return this->cpuFallback->Compute(input, output); + } + return false; + } + + if (!input.depthImage || !input.normalImage || !input.reflectivityImage) + { + return false; + } + + const cv::Mat & depth = *input.depthImage; + const cv::Mat & normal = *input.normalImage; + const cv::Mat & reflectivity = *input.reflectivityImage; + + if (depth.empty() || normal.empty() || reflectivity.empty()) + { + return false; + } + + const int nBeams = input.nBeams; + const int nRays = input.nRays; + const int nFreq = input.nFreq; + + if (nBeams <= 0 || nRays <= 0 || nFreq <= 0) + { + return false; + } + + auto start = std::chrono::high_resolution_clock::now(); + + // Compute area_scaler matching CUDA: ray_azimuthAngleWidth * ray_elevationAngleWidth + // where ray_azimuthAngleWidth = hPixelSize + // ray_elevationAngleWidth = vPixelSize * (raySkips + 1) + const float hPixelSize = static_cast(input.hFOV) / std::max(1, nBeams - 1); + const float vPixelSize = static_cast(input.vFOV) / std::max(1, nRays - 1); + + const int raySkipsFactor = std::max(1, input.raySkips); + const float area_scaler = hPixelSize * vPixelSize * raySkipsFactor; + + // This transposes from OpenCV's [row=ray][col=beam] to [beam][ray] for WGPU. + // Flatten as [beam][ray]. + std::vector depthFlat(static_cast(nBeams) * nRays, 0.0f); + std::vector reflFlat(static_cast(nBeams) * nRays, 0.0f); + std::vector normalFlat(static_cast(nBeams) * nRays * 3, 0.0f); + + for (int beam = 0; beam < nBeams; ++beam) + { + for (int ray = 0; ray < nRays; ++ray) + { + if (ray >= depth.rows || beam >= depth.cols) + { + continue; + } + const size_t idx = static_cast(beam) * nRays + ray; + depthFlat[idx] = depth.at(ray, beam); + reflFlat[idx] = reflectivity.at(ray, beam); + const cv::Vec3f n = normal.at(ray, beam); + normalFlat[idx * 3 + 0] = n[0]; + normalFlat[idx * 3 + 1] = n[1]; + normalFlat[idx * 3 + 2] = n[2]; + } + } + + std::vector beamCorrectorFlat(static_cast(nBeams) * nBeams, 0.0f); + if (input.beamCorrector) + { + for (int r = 0; r < nBeams; ++r) + { + for (int c = 0; c < nBeams; ++c) + { + beamCorrectorFlat[static_cast(r) * nBeams + c] = input.beamCorrector[r][c]; + } + } + } + + std::vector windowFlat(static_cast(nFreq), 1.0f); + if (input.window) + { + for (int i = 0; i < nFreq; ++i) + { + windowFlat[static_cast(i)] = input.window[i]; + } + } + + float * result = sonar_wgpu_compute( + depthFlat.data(), normalFlat.data(), reflFlat.data(), beamCorrectorFlat.data(), + windowFlat.data(), static_cast(nBeams), static_cast(nRays), + static_cast(nFreq), static_cast(std::max(0, input.raySkips)), + static_cast(input.soundSpeed), static_cast(input.maxDistance), + static_cast(input.sourceLevel), static_cast(input.attenuation), input.sensorGain, + static_cast(input.bandwidth), input.beamCorrectorSum, area_scaler, + static_cast(input.hFOV), static_cast(input.vFOV), input.frameIndex, input.seed); + + if (!result) + { + // GPU returned null -> fall back to CPU backend + if (!this->cpuFallback) + { + this->cpuFallback = CreateComputeBackend("cpu"); + if (this->cpuFallback) + { + this->cpuFallback->Initialize(input); + std::cerr << "[sonar_wgpu] GPU compute returned null -> falling back to CPU backend." + << std::endl; + } + } + if (this->cpuFallback) + { + return this->cpuFallback->Compute(input, output); + } + return false; + } + + output.nBeams = nBeams; + output.nFreq = nFreq; + output.beamSpectrum.assign(static_cast(nBeams) * nFreq, std::complex(0.0f, 0.0f)); + + for (int b = 0; b < nBeams; ++b) + { + for (int f = 0; f < nFreq; ++f) + { + const size_t base = (static_cast(b) * nFreq + f) * 2; + output.At(b, f) = std::complex(result[base], result[base + 1]); + } + } + + sonar_wgpu_free(result, static_cast(nBeams) * nFreq * 2); + + auto stop = std::chrono::high_resolution_clock::now(); + output.computeMicros = static_cast( + std::chrono::duration_cast(stop - start).count()); + return true; +} + +} // namespace sensors +} // namespace gz diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_wgpu.hh b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_wgpu.hh new file mode 100644 index 00000000..deb79c89 --- /dev/null +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_wgpu.hh @@ -0,0 +1,28 @@ +#pragma once + +#include + +#include "sonar_compute_backend.hh" + +namespace gz +{ +namespace sensors +{ + +class WgpuComputeBackend : public ComputeBackend +{ +public: + const char * Name() const override; + + bool Initialize(const SonarComputeInput & prototype) override; + + bool Compute(const SonarComputeInput & input, SonarComputeOutput & output) override; + +private: + bool initialized{false}; + bool gpuAvailable{false}; + std::unique_ptr cpuFallback; +}; + +} // namespace sensors +} // namespace gz diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_demo/CMakeLists.txt b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_demo/CMakeLists.txt index 0c1b3234..04e9178f 100644 --- a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_demo/CMakeLists.txt +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_demo/CMakeLists.txt @@ -2,24 +2,24 @@ cmake_minimum_required(VERSION 3.8) project(dave_multibeam_sonar_demo) find_package(ament_cmake REQUIRED) -find_package(CUDAToolkit QUIET) -if(CUDAToolkit_FOUND) - install( - DIRECTORY launch - DESTINATION share/${PROJECT_NAME} - ) +install( + DIRECTORY launch + DESTINATION share/${PROJECT_NAME} +) - install( - DIRECTORY rviz - DESTINATION share/${PROJECT_NAME} - ) +install( + DIRECTORY rviz + DESTINATION share/${PROJECT_NAME} +) + +install( + PROGRAMS + scripts/plotdata.py + DESTINATION lib/${PROJECT_NAME} +) ament_environment_hooks( "${CMAKE_CURRENT_SOURCE_DIR}/hooks/${PROJECT_NAME}.dsv.in") -else() - message(STATUS "CUDA Toolkit not found: Skipping CUDA-specific targets") -endif() - ament_package() diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_demo/launch/multibeam_sonar_demo.launch.py b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_demo/launch/multibeam_sonar_demo.launch.py index 56d88cc2..c8f1aadc 100644 --- a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_demo/launch/multibeam_sonar_demo.launch.py +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_demo/launch/multibeam_sonar_demo.launch.py @@ -19,6 +19,7 @@ from launch import LaunchDescription from launch.actions import DeclareLaunchArgument from launch.actions import IncludeLaunchDescription +from launch.actions import SetEnvironmentVariable from launch.conditions import IfCondition from launch.launch_description_sources import PythonLaunchDescriptionSource from launch.substitutions import LaunchConfiguration @@ -27,6 +28,7 @@ def generate_launch_description(): + compute_backend = LaunchConfiguration("compute_backend") pkg_dave_demos = get_package_share_directory("dave_demos") multibeam_sonar_sim = IncludeLaunchDescription( PythonLaunchDescriptionSource( @@ -39,6 +41,7 @@ def generate_launch_description(): "x": "5.8", "z": "2", "yaw": "3.14", + "compute_backend": compute_backend, }.items(), ) @@ -56,8 +59,14 @@ def generate_launch_description(): return LaunchDescription( [ - multibeam_sonar_sim, + DeclareLaunchArgument( + "compute_backend", + default_value="auto", + description="Sonar backend selection: auto|wgpu|cpu", + ), DeclareLaunchArgument("rviz", default_value="true", description="Open RViz."), + SetEnvironmentVariable("DAVE_SONAR_COMPUTE_BACKEND", compute_backend), + multibeam_sonar_sim, rviz, ] ) diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_demo/package.xml b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_demo/package.xml index c8a3da98..3aba7743 100644 --- a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_demo/package.xml +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_demo/package.xml @@ -6,6 +6,8 @@ Woen-Sug Choi Apache 2.0 ament_cmake + python3-numpy + python3-matplotlib ament_cmake diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_demo/scripts/plotdata.py b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_demo/scripts/plotdata.py index 3929e443..675d62b5 100644 --- a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_demo/scripts/plotdata.py +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_demo/scripts/plotdata.py @@ -1,3 +1,5 @@ +import os + import numpy as np import matplotlib.pyplot as plt @@ -10,11 +12,30 @@ maxRange = 5 xPlotRange = 10 yPlotRange = xPlotRange * np.cos(45 * np.pi / 180) -filename = "SonarRawData_000001.csv" -beam_angle_file = "SonarRawData_beam_angles.csv" +source_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..")) +inferred_root = source_root +candidates = [ + os.environ.get("DAVE_SOURCE_ROOT", ""), + os.getcwd(), + inferred_root, + os.path.join(inferred_root, "src", "dave"), +] +source_root = inferred_root +for candidate in candidates: + if candidate and os.path.exists(os.path.join(candidate, "models", "dave_worlds", "worlds")): + source_root = candidate + break + +results_dir = os.path.join(source_root, "results") +os.makedirs(results_dir, exist_ok=True) + +filename = os.path.join(results_dir, "SonarRawData_000001.csv") +beam_angle_file = os.path.join(results_dir, "SonarRawData_beam_angles.csv") bw = 29.9e3 plotSkips = 1 epsilon = 1e-10 +scatter_output_file = os.path.join(results_dir, "SonarScatter.png") +beam_output_file = os.path.join(results_dir, "SonarBeamProfiles.png") # ------------------------------------------- @@ -71,7 +92,8 @@ def load_complex_csv(filepath): plt.ylim(1.02 * np.array([-yPlotRange, yPlotRange])) plt.gca().set_facecolor("k") plt.tight_layout() -plt.show() +plt.savefig(scatter_output_file, dpi=200) +plt.close() # ------------------------------------------- # LINE PLOTS FOR INDIVIDUAL BEAMS @@ -91,4 +113,8 @@ def load_complex_csv(filepath): plt.suptitle("Beam Profiles") plt.tight_layout() -plt.show() +plt.savefig(beam_output_file, dpi=200) +plt.close() + +print(f"Saved {scatter_output_file}") +print(f"Saved {beam_output_file}") diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_system/CMakeLists.txt b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_system/CMakeLists.txt index 7bdb72c5..cf18b197 100644 --- a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_system/CMakeLists.txt +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_system/CMakeLists.txt @@ -2,69 +2,37 @@ cmake_minimum_required(VERSION 3.11.0 FATAL_ERROR) project(multibeam_sonar_system) find_package(ament_cmake REQUIRED) -find_package(CUDAToolkit QUIET) - -if(CUDAToolkit_FOUND) - find_package(gz-cmake3 REQUIRED) - find_package(gz-common5 REQUIRED COMPONENTS profiler) - find_package(gz-plugin2 REQUIRED COMPONENTS register) - find_package(gz-rendering8 REQUIRED OPTIONAL_COMPONENTS ogre ogre2) - find_package(gz-sim8 REQUIRED) - find_package(gz-sensors8 REQUIRED) - - set(GZ_COMMON_VER ${gz-common5_VERSION_MAJOR}) - set(GZ_PLUGIN_VER ${gz-plugin2_VERSION_MAJOR}) - set(GZ_SIM_VER ${gz-sim8_VERSION_MAJOR}) - set(GZ_RENDERING_VER ${gz-rendering8_VERSION_MAJOR}) - set(GZ_SENSORS_VER ${gz-sensors8_VERSION_MAJOR}) - - if(TARGET gz-rendering${GZ_RENDERING_VER}::ogre) - set(HAVE_OGRE TRUE) - set(GZ_RENDERING_TARGET gz-rendering${GZ_RENDERING_VER}-ogre) - add_definitions(-DWITH_OGRE) - endif() +find_package(gz-common REQUIRED COMPONENTS profiler) +find_package(gz-plugin REQUIRED COMPONENTS register) +find_package(gz-rendering REQUIRED OPTIONAL_COMPONENTS ogre ogre2) +find_package(gz-sim REQUIRED) +find_package(gz-sensors REQUIRED) + +if(TARGET gz-rendering::ogre) + add_definitions(-DWITH_OGRE) +endif() - if(TARGET gz-rendering${GZ_RENDERING_VER}::ogre2) - set(HAVE_OGRE2 TRUE) - set(GZ_RENDERING_TARGET gz-rendering${GZ_RENDERING_VER}-ogre2) - add_definitions(-DWITH_OGRE2) - endif() +if(TARGET gz-rendering::ogre2) + add_definitions(-DWITH_OGRE2) +endif() - add_subdirectory(../multibeam_sonar ${CMAKE_BINARY_DIR}/multibeam_sonar) +add_subdirectory(../multibeam_sonar ${CMAKE_BINARY_DIR}/multibeam_sonar) +if(TARGET multibeam_sonar) add_library(${PROJECT_NAME} SHARED MultibeamSonarSystem.cc) target_link_libraries(${PROJECT_NAME} - gz-common${GZ_COMMON_VER}::gz-common${GZ_COMMON_VER} - gz-plugin${GZ_PLUGIN_VER}::gz-plugin${GZ_PLUGIN_VER} - gz-sim${GZ_SIM_VER}::gz-sim${GZ_SIM_VER} - gz-sensors${GZ_SENSORS_VER}::gz-sensors${GZ_SENSORS_VER} - multibeam_sonar - ) - target_include_directories(${PROJECT_NAME} - PUBLIC - ../multibeam_sonar - ) - - ament_target_dependencies(${PROJECT_NAME} - gz-common${GZ_COMMON_VER} - ${GZ_RENDERING_TARGET} - gz-plugin${GZ_PLUGIN_VER} - gz-sim${GZ_SIM_VER} - gz-sensors${GZ_SENSORS_VER} - ) - - # Install targets - install(TARGETS ${PROJECT_NAME} - DESTINATION lib/${PROJECT_NAME} - ) - - # Environment hooks - ament_environment_hooks( - "${CMAKE_CURRENT_SOURCE_DIR}/hooks/${PROJECT_NAME}.dsv.in" + gz-common::gz-common + gz-plugin::gz-plugin + gz-sim::gz-sim + gz-sensors::gz-sensors + multibeam_sonar ) + target_include_directories(${PROJECT_NAME} PUBLIC ../multibeam_sonar) + install(TARGETS ${PROJECT_NAME} DESTINATION lib/${PROJECT_NAME}) + ament_environment_hooks("${CMAKE_CURRENT_SOURCE_DIR}/hooks/${PROJECT_NAME}.dsv.in") else() - message(STATUS "CUDA Toolkit not found: Skipping CUDA-specific targets") + message(STATUS "multibeam_sonar target unavailable: Skipping multibeam_sonar_system") endif() ament_package() diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_system/package.xml b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_system/package.xml index 8c9bf152..6f921a21 100644 --- a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_system/package.xml +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_system/package.xml @@ -9,7 +9,9 @@ Apache 2.0 geometry_msgs ament_cmake + multibeam_sonar ament_lint_auto + ament_lint_common ament_cmake diff --git a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/CMakeLists.txt b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/CMakeLists.txt new file mode 100644 index 00000000..511b06e1 --- /dev/null +++ b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/CMakeLists.txt @@ -0,0 +1,55 @@ +cmake_minimum_required(VERSION 3.11) +project(wgpu_vendor) + +find_package(ament_cmake REQUIRED) +find_package(Vulkan REQUIRED) + +find_program(CARGO_EXECUTABLE cargo REQUIRED) + +set(RUST_CRATE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/sonar_wgpu_rust) +set(RUST_TARGET_DIR ${CMAKE_CURRENT_BINARY_DIR}/cargo_target) +set(RUST_PROFILE_DIR ${RUST_TARGET_DIR}/release) +set(RUST_LIB ${RUST_PROFILE_DIR}/libsonar_wgpu.a) + +# Track all Rust source files so colcon/CMake triggers cargo when they change. +file(GLOB_RECURSE RUST_SOURCES + ${RUST_CRATE_DIR}/src/*.rs + ${RUST_CRATE_DIR}/src/shaders/*.wgsl + ${RUST_CRATE_DIR}/Cargo.toml + ${RUST_CRATE_DIR}/Cargo.lock +) + +add_custom_command( + OUTPUT ${RUST_LIB} + COMMAND ${CARGO_EXECUTABLE} build --release --target-dir ${RUST_TARGET_DIR} + WORKING_DIRECTORY ${RUST_CRATE_DIR} + DEPENDS ${RUST_SOURCES} + COMMENT "Building sonar_wgpu Rust static library" + VERBATIM +) + +add_custom_target(sonar_wgpu_rust ALL DEPENDS ${RUST_LIB}) + +add_library(sonar_wgpu_rust_static STATIC IMPORTED GLOBAL) +set_target_properties(sonar_wgpu_rust_static PROPERTIES + IMPORTED_LOCATION ${RUST_LIB}) +add_dependencies(sonar_wgpu_rust_static sonar_wgpu_rust) + +add_library(wgpu_vendor INTERFACE) +add_dependencies(wgpu_vendor sonar_wgpu_rust_static) + +target_link_libraries(wgpu_vendor INTERFACE sonar_wgpu_rust_static dl pthread m Vulkan::Vulkan) + +add_library(wgpu::wgpu_vendor ALIAS wgpu_vendor) + +install(FILES ${RUST_LIB} DESTINATION lib) + +install(TARGETS wgpu_vendor EXPORT wgpu_vendorTargets) +install(EXPORT wgpu_vendorTargets + NAMESPACE wgpu:: + FILE wgpu_vendorTargets.cmake + DESTINATION share/${PROJECT_NAME}/cmake) + +ament_export_targets(wgpu_vendorTargets HAS_LIBRARY_TARGET) +ament_export_dependencies(Vulkan) +ament_package() diff --git a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/package.xml b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/package.xml new file mode 100644 index 00000000..17f5eb07 --- /dev/null +++ b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/package.xml @@ -0,0 +1,12 @@ + + + wgpu_vendor + 0.1.0 + Vendor package exposing wgpu CMake target for DAVE sonar backend. + DAVE Maintainers + Apache-2.0 + ament_cmake + + ament_cmake + + diff --git a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/Cargo.lock b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/Cargo.lock new file mode 100644 index 00000000..9e8ac211 --- /dev/null +++ b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/Cargo.lock @@ -0,0 +1,1156 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + +[[package]] +name = "ash" +version = "0.37.3+1.3.251" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39e9c3835d686b0a6084ab4234fcd1b07dbf6e4767dce60874b12356a25ecd4a" +dependencies = [ + "libloading 0.7.4", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" + +[[package]] +name = "block" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a" + +[[package]] +name = "bumpalo" +version = "3.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" + +[[package]] +name = "bytemuck" +version = "1.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" +dependencies = [ + "bytemuck_derive", +] + +[[package]] +name = "bytemuck_derive" +version = "1.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cfg_aliases" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" + +[[package]] +name = "codespan-reporting" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" +dependencies = [ + "termcolor", + "unicode-width", +] + +[[package]] +name = "com" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e17887fd17353b65b1b2ef1c526c83e26cd72e74f598a8dc1bee13a48f3d9f6" +dependencies = [ + "com_macros", +] + +[[package]] +name = "com_macros" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d375883580a668c7481ea6631fc1a8863e33cc335bf56bfad8d7e6d4b04b13a5" +dependencies = [ + "com_macros_support", + "proc-macro2", + "syn 1.0.109", +] + +[[package]] +name = "com_macros_support" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad899a1087a9296d5644792d7cb72b8e34c1bec8e7d4fbc002230169a6e8710c" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "core-graphics-types" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45390e6114f68f718cc7a830514a96f903cccd70d02a8f6d9f643ac4ba45afaf" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "libc", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "d3d12" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b28bfe653d79bd16c77f659305b195b82bb5ce0c0eb2a4846b82ddbd77586813" +dependencies = [ + "bitflags 2.11.0", + "libloading 0.8.9", + "winapi", +] + +[[package]] +name = "document-features" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61" +dependencies = [ + "litrs", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "foreign-types" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d737d9aa519fb7b749cbc3b962edcf310a8dd1f4b67c91c4f83975dbdd17d965" +dependencies = [ + "foreign-types-macros", + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-macros" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "foreign-types-shared" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa9a19cbb55df58761df49b23516a86d432839add4af60fc256da840f66ed35b" + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-core", + "futures-task", + "pin-project-lite", + "slab", +] + +[[package]] +name = "gl_generator" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a95dfc23a2b4a9a2f5ab41d194f8bfda3cabec42af4e39f08c339eb2a0c124d" +dependencies = [ + "khronos_api", + "log", + "xml-rs", +] + +[[package]] +name = "glow" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd348e04c43b32574f2de31c8bb397d96c9fcfa1371bd4ca6d8bdc464ab121b1" +dependencies = [ + "js-sys", + "slotmap", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "glutin_wgl_sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8098adac955faa2d31079b65dc48841251f69efd3ac25477903fc424362ead" +dependencies = [ + "gl_generator", +] + +[[package]] +name = "gpu-alloc" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbcd2dba93594b227a1f57ee09b8b9da8892c34d55aa332e034a228d0fe6a171" +dependencies = [ + "bitflags 2.11.0", + "gpu-alloc-types", +] + +[[package]] +name = "gpu-alloc-types" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98ff03b468aa837d70984d55f5d3f846f6ec31fe34bbb97c4f85219caeee1ca4" +dependencies = [ + "bitflags 2.11.0", +] + +[[package]] +name = "gpu-allocator" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f56f6318968d03c18e1bcf4857ff88c61157e9da8e47c5f29055d60e1228884" +dependencies = [ + "log", + "presser", + "thiserror", + "winapi", + "windows", +] + +[[package]] +name = "gpu-descriptor" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b89c83349105e3732062a895becfc71a8f921bb71ecbbdd8ff99263e3b53a0ca" +dependencies = [ + "bitflags 2.11.0", + "gpu-descriptor-types", + "hashbrown 0.15.5", +] + +[[package]] +name = "gpu-descriptor-types" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdf242682df893b86f33a73828fb09ca4b2d3bb6cc95249707fc684d27484b91" +dependencies = [ + "bitflags 2.11.0", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + +[[package]] +name = "hassle-rs" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af2a7e73e1f34c48da31fb668a907f250794837e08faa144fd24f0b8b741e890" +dependencies = [ + "bitflags 2.11.0", + "com", + "libc", + "libloading 0.8.9", + "thiserror", + "widestring", + "winapi", +] + +[[package]] +name = "hexf-parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfa686283ad6dd069f105e5ab091b04c62850d3e4cf5d67debad1933f55023df" + +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown 0.16.1", +] + +[[package]] +name = "jni-sys" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" + +[[package]] +name = "js-sys" +version = "0.3.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "khronos-egl" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6aae1df220ece3c0ada96b8153459b67eebe9ae9212258bb0134ae60416fdf76" +dependencies = [ + "libc", + "libloading 0.8.9", + "pkg-config", +] + +[[package]] +name = "khronos_api" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2db585e1d738fc771bf08a151420d3ed193d9d895a36df7f6f8a9456b911ddc" + +[[package]] +name = "libc" +version = "0.2.183" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" + +[[package]] +name = "libloading" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" +dependencies = [ + "cfg-if", + "winapi", +] + +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + +[[package]] +name = "litrs" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092" + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "malloc_buf" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62bb907fe88d54d8d9ce32a3cceab4218ed2f6b7d35617cafe9adf84e43919cb" +dependencies = [ + "libc", +] + +[[package]] +name = "metal" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5637e166ea14be6063a3f8ba5ccb9a4159df7d8f6d61c02fc3d480b1f90dcfcb" +dependencies = [ + "bitflags 2.11.0", + "block", + "core-graphics-types", + "foreign-types", + "log", + "objc", + "paste", +] + +[[package]] +name = "naga" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e536ae46fcab0876853bd4a632ede5df4b1c2527a58f6c5a4150fe86be858231" +dependencies = [ + "arrayvec", + "bit-set", + "bitflags 2.11.0", + "codespan-reporting", + "hexf-parse", + "indexmap", + "log", + "num-traits", + "rustc-hash", + "spirv", + "termcolor", + "thiserror", + "unicode-xid", +] + +[[package]] +name = "ndk-sys" +version = "0.5.0+25.2.9519653" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c196769dd60fd4f363e11d948139556a344e79d451aeb2fa2fd040738ef7691" +dependencies = [ + "jni-sys", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "objc" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "915b1b472bc21c53464d6c8461c9d3af805ba1ef837e1cac254428f4a77177b1" +dependencies = [ + "malloc_buf", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "pollster" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22686f4785f02a4fcc856d3b3bb19bf6c8160d103f7a99cc258bddd0251dc7f2" + +[[package]] +name = "presser" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8cf8e6a8aa66ce33f63993ffc4ea4271eb5b0530a9002db8455ea6050c77bfa" + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "profiling" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3eb8486b569e12e2c32ad3e204dbaba5e4b5b216e9367044f25f1dba42341773" + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "range-alloc" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca45419789ae5a7899559e9512e58ca889e41f04f1f2445e9f4b290ceccd1d08" + +[[package]] +name = "raw-window-handle" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20675572f6f24e9e76ef639bc5552774ed45f1c30e2951e1e99c59888861c539" + +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags 2.11.0", +] + +[[package]] +name = "renderdoc-sys" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b30a45b0cd0bcca8037f3d0dc3421eaf95327a17cad11964fb8179b4fc4832" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + +[[package]] +name = "slotmap" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdd58c3c93c3d278ca835519292445cb4b0d4dc59ccfdf7ceadaab3f8aeb4038" +dependencies = [ + "version_check", +] + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "sonar_wgpu" +version = "0.1.0" +dependencies = [ + "bytemuck", + "pollster", + "rayon", + "wgpu", +] + +[[package]] +name = "spirv" +version = "0.3.0+sdk-1.3.268.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eda41003dc44290527a59b13432d4a0379379fa074b70174882adfbdfd917844" +dependencies = [ + "bitflags 2.11.0", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wasm-bindgen" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8" +dependencies = [ + "cfg-if", + "futures-util", + "js-sys", + "once_cell", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn 2.0.117", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "web-sys" +version = "0.3.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "wgpu" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90e37c7b9921b75dfd26dd973fdcbce36f13dfa6e2dc82aece584e0ed48c355c" +dependencies = [ + "arrayvec", + "cfg-if", + "cfg_aliases", + "document-features", + "js-sys", + "log", + "naga", + "parking_lot", + "profiling", + "raw-window-handle", + "smallvec", + "static_assertions", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "wgpu-core", + "wgpu-hal", + "wgpu-types", +] + +[[package]] +name = "wgpu-core" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d50819ab545b867d8a454d1d756b90cd5f15da1f2943334ca314af10583c9d39" +dependencies = [ + "arrayvec", + "bit-vec", + "bitflags 2.11.0", + "cfg_aliases", + "codespan-reporting", + "document-features", + "indexmap", + "log", + "naga", + "once_cell", + "parking_lot", + "profiling", + "raw-window-handle", + "rustc-hash", + "smallvec", + "thiserror", + "web-sys", + "wgpu-hal", + "wgpu-types", +] + +[[package]] +name = "wgpu-hal" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "172e490a87295564f3fcc0f165798d87386f6231b04d4548bca458cbbfd63222" +dependencies = [ + "android_system_properties", + "arrayvec", + "ash", + "bit-set", + "bitflags 2.11.0", + "block", + "cfg_aliases", + "core-graphics-types", + "d3d12", + "glow", + "glutin_wgl_sys", + "gpu-alloc", + "gpu-allocator", + "gpu-descriptor", + "hassle-rs", + "js-sys", + "khronos-egl", + "libc", + "libloading 0.8.9", + "log", + "metal", + "naga", + "ndk-sys", + "objc", + "once_cell", + "parking_lot", + "profiling", + "range-alloc", + "raw-window-handle", + "renderdoc-sys", + "rustc-hash", + "smallvec", + "thiserror", + "wasm-bindgen", + "web-sys", + "wgpu-types", + "winapi", +] + +[[package]] +name = "wgpu-types" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1353d9a46bff7f955a680577f34c69122628cc2076e1d6f3a9be6ef00ae793ef" +dependencies = [ + "bitflags 2.11.0", + "js-sys", + "web-sys", +] + +[[package]] +name = "widestring" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72069c3113ab32ab29e5584db3c6ec55d416895e60715417b5b883a357c3e471" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" +dependencies = [ + "windows-core", + "windows-targets", +] + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "xml-rs" +version = "0.8.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ae8337f8a065cfc972643663ea4279e04e7256de865aa66fe25cec5fb912d3f" diff --git a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/Cargo.toml b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/Cargo.toml new file mode 100644 index 00000000..e09be284 --- /dev/null +++ b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "sonar_wgpu" +version = "0.1.0" +edition = "2021" + +[lib] +crate-type = ["staticlib"] + +[dependencies] +wgpu = { version = "0.20" } +bytemuck = { version = "1", features = ["derive"] } +pollster = "0.3" +rayon = "1" + +[profile.release] +opt-level = 3 +lto = true diff --git a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/fft.rs b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/fft.rs new file mode 100644 index 00000000..1bfe5103 --- /dev/null +++ b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/fft.rs @@ -0,0 +1,268 @@ +// Cooley-Tukey FFT and Bluestein's chirp-Z transform for non-power-of-two sizes + +/// CPU FFT: WGPU port of CUDA cuFFT. +/// Power-of-2 N: Cooley-Tukey O(N log N). Arbitrary N: Bluestein chirp-Z O(N log M). +/// CPU used for validation and CUDA non-power-of-2 fallback (GPU shader: power-of-2 only, N <= 4096). +use crate::ComplexF; +use rayon::prelude::*; + +fn is_power_of_two(x: usize) -> bool { + x != 0 && (x & (x - 1)) == 0 +} + +fn bit_reverse(mut x: usize, bits: usize) -> usize { + let mut y = 0usize; + for _ in 0..bits { + y = (y << 1) | (x & 1); + x >>= 1; + } + y +} + +/// Cooley-Tukey FFT: CUDA cuFFT Cooley-Tukey algorithm (CPU reference). +/// Requires N = power of 2. Use bluestein_fft() for arbitrary N (CUDA extension). +fn fft_in_place(data: &mut [ComplexF]) { + let n = data.len(); + if n <= 1 { + return; + } + + let bits = (n as f32).log2() as usize; + for i in 0..n { + let j = bit_reverse(i, bits); + if j > i { + data.swap(i, j); + } + } + + let mut len = 2usize; + while len <= n { + let ang = -2.0f32 * std::f32::consts::PI / len as f32; + let wlen_re = ang.cos(); + let wlen_im = ang.sin(); + + let mut i = 0usize; + while i < n { + let mut w_re = 1.0f32; + let mut w_im = 0.0f32; + let half = len / 2; + for j in 0..half { + let u = data[i + j]; + let t = data[i + j + half]; + + let v_re = t.re * w_re - t.im * w_im; + let v_im = t.re * w_im + t.im * w_re; + + data[i + j] = ComplexF { + re: u.re + v_re, + im: u.im + v_im, + }; + data[i + j + half] = ComplexF { + re: u.re - v_re, + im: u.im - v_im, + }; + + let nw_re = w_re * wlen_re - w_im * wlen_im; + let nw_im = w_re * wlen_im + w_im * wlen_re; + w_re = nw_re; + w_im = nw_im; + } + i += len; + } + len <<= 1; + } +} + +/// Inverse FFT: Matches CUDA cuFFT conjugate-symmetry trick. IFFT(x) = conj(FFT(conj(x))) / N. +/// Requires power-of-2 N matching forward FFT (CUDA cufftExecC2C standard). +fn ifft_in_place(data: &mut [ComplexF]) { + let n = data.len(); + for x in data.iter_mut() { + x.im = -x.im; + } + fft_in_place(data); + let inv = 1.0f32 / n as f32; + for x in data.iter_mut() { + x.re *= inv; + x.im = -x.im * inv; + } +} + +/// Bluestein chirp-Z: Non-CUDA extension for arbitrary N DFT. O(N log M) where M = smallest 2^k >= 2N-1. +/// ~30-50x faster than naive O(N²) DFT. CUDA cuFFT only supports power-of-2; this is CPU addon. +fn bluestein_fft(data: &mut [ComplexF]) { + let n = data.len(); + if n <= 1 { + return; + } + + // M = smallest power-of-two >= 2*n - 1 (ensures linear convolution fits) + let mut m = 1usize; + while m < 2 * n - 1 { + m <<= 1; + } + + // Chirp sequence: w[k] = exp(-j*pi*k^2/n) + // stored as (cos, sin) where the complex value is (cos, -sin) = cos - j*sin + let chirp: Vec<(f32, f32)> = (0..n) + .map(|k| { + let theta = std::f32::consts::PI * (k * k) as f32 / n as f32; + (theta.cos(), theta.sin()) // (cos θ, sin θ) so chirp = cos - j·sin + }) + .collect(); + + // a[k] = data[k] * chirp[k] = data[k] * exp(-j*pi*k^2/n), zero-padded to M + let mut a = vec![ComplexF::default(); m]; + for k in 0..n { + let (c, s) = chirp[k]; // chirp[k] = (c, -s) as complex + a[k].re = data[k].re * c + data[k].im * s; // Re[(a+jb)(c-js)] = ac+bs + a[k].im = data[k].im * c - data[k].re * s; // Im[(a+jb)(c-js)] = bc-as + } + + // h[k] = conj(chirp[k]) = exp(+j*pi*k^2/n), with wrap-around: h[M-k] = h[k] + let mut h = vec![ComplexF::default(); m]; + for k in 0..n { + let (c, s) = chirp[k]; + h[k] = ComplexF { re: c, im: s }; // conj(chirp[k]) = cos + j*sin + if k > 0 { + h[m - k] = h[k]; // symmetric fill for circular convolution + } + } + + // Circular convolution via FFT: Y = IFFT(FFT(a) * FFT(h)) + fft_in_place(&mut a); + fft_in_place(&mut h); + for i in 0..m { + let re = a[i].re * h[i].re - a[i].im * h[i].im; + let im = a[i].re * h[i].im + a[i].im * h[i].re; + a[i] = ComplexF { re, im }; + } + ifft_in_place(&mut a); + + // X[k] = a[k] * chirp[k] = a[k] * exp(-j*pi*k^2/n) + for k in 0..n { + let (c, s) = chirp[k]; + data[k].re = a[k].re * c + a[k].im * s; + data[k].im = a[k].im * c - a[k].re * s; + } +} + +/// Batched FFT: CPU dispatcher matching CUDA cuFFT batching. Routes to GPU (power-of-2 && <= 4096) or CPU fallback. +pub fn fft_batched(input: &[ComplexF], n_beams: usize, n_freq: usize) -> Vec { + // Collect into per-beam rows, transform in parallel, then flatten back. + let mut rows: Vec> = (0..n_beams) + .map(|b| input[b * n_freq..(b + 1) * n_freq].to_vec()) + .collect(); + + rows.par_iter_mut().for_each(|row| { + if is_power_of_two(row.len()) { + fft_in_place(row); + } else { + bluestein_fft(row); + } + }); + + rows.into_iter().flatten().collect() +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ComplexF; + + /// Reference O(N²) DFT used only in tests for correctness comparison. + fn naive_dft(data: &mut [ComplexF]) { + let n = data.len(); + let input: Vec = data.to_vec(); + for k in 0..n { + let mut re_sum = 0.0f32; + let mut im_sum = 0.0f32; + for j in 0..n { + let angle = -2.0 * std::f32::consts::PI * (k as f32) * (j as f32) / (n as f32); + re_sum += input[j].re * angle.cos() - input[j].im * angle.sin(); + im_sum += input[j].re * angle.sin() + input[j].im * angle.cos(); + } + data[k] = ComplexF { re: re_sum, im: im_sum }; + } + } + + fn nearly_eq(a: f32, b: f32) -> bool { + (a - b).abs() < 1e-3 + } + + fn nearly_eq_rel(a: f32, b: f32) -> bool { + let mag = b.abs().max(1e-6); + (a - b).abs() / mag < 1e-2 // 1% relative tolerance for f32 accumulation over N=399 + } + + #[test] + fn bluestein_matches_naive_dft_n3() { + let mut x_b = vec![ + ComplexF { re: 1.0, im: 0.0 }, + ComplexF { re: 2.0, im: -1.0 }, + ComplexF { re: 0.5, im: 3.0 }, + ]; + let mut x_d = x_b.clone(); + bluestein_fft(&mut x_b); + naive_dft(&mut x_d); + for (b, d) in x_b.iter().zip(x_d.iter()) { + assert!(nearly_eq(b.re, d.re), "re: {} vs {}", b.re, d.re); + assert!(nearly_eq(b.im, d.im), "im: {} vs {}", b.im, d.im); + } + } + + #[test] + fn bluestein_matches_naive_dft_n399() { + use std::f32::consts::PI; + let n = 399usize; + // Construct a chirp-like test signal + let input: Vec = (0..n) + .map(|k| ComplexF { + re: ((k as f32) * PI / 50.0).cos(), + im: ((k as f32) * PI / 75.0).sin(), + }) + .collect(); + let mut x_b = input.clone(); + let mut x_d = input.clone(); + bluestein_fft(&mut x_b); + naive_dft(&mut x_d); + // Compare first 5 and last 5 bins + for &i in &[0, 1, 2, 3, 4, 394, 395, 396, 397, 398] { + assert!(nearly_eq_rel(x_b[i].re, x_d[i].re), + "bin {i} re: {} vs {}", x_b[i].re, x_d[i].re); + assert!(nearly_eq_rel(x_b[i].im, x_d[i].im), + "bin {i} im: {} vs {}", x_b[i].im, x_d[i].im); + } + } + + #[test] + fn bluestein_impulse_gives_constant_spectrum() { + // DFT of [1, 0, 0, ..., 0] = [1, 1, 1, ..., 1] + let n = 399usize; + let mut x = vec![ComplexF::default(); n]; + x[0].re = 1.0; + bluestein_fft(&mut x); + for (i, v) in x.iter().enumerate() { + assert!(nearly_eq(v.re, 1.0), "bin {i} re = {} (expected 1)", v.re); + assert!(nearly_eq(v.im, 0.0), "bin {i} im = {} (expected 0)", v.im); + } + } +} + + +/* + +X[k] = Σ x[n] · e^(-j·2π·kn/N) + + = Σ x[n] · e^(-jπ(k² + n² - (k-n)²)/N) + + = e^(-jπk²/N) · Σ [x[n] · e^(-jπn²/N)] · e^(+jπ(k-n)²/N) + ─────────── ───────────────────── ──────────────── + post-chirp a[n] (pre-chirped) h[k-n] (filter) + +So the full Bluestein algorithm is: + 1. a[n] = x[n] · e^(-jπn²/N) premultiply by chirp + 2. Y[k] = (a ★ h)[k] convolve with conjugate chirp filter + 3. X[k] = Y[k] · e^(-jπk²/N) postmultiply by chirp + +*/ \ No newline at end of file diff --git a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/lib.rs b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/lib.rs new file mode 100644 index 00000000..a737df3f --- /dev/null +++ b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/lib.rs @@ -0,0 +1,581 @@ +/// C-FFI interface, Buffer initialization and write ups and reads + Bind groups created + encoder setup + memory free +/// + Compute passes for each shader + gpu/cpu path selection + +mod fft; // CPU FFT + Bluestein +mod pipeline; // GPU context + buffer management + +use bytemuck::{Pod, Zeroable}; + +#[derive(Clone, Copy, Default)] +pub struct ComplexF { + pub re: f32, + pub im: f32, +} +use std::sync::atomic::{AtomicU64, Ordering}; +use wgpu::util::DeviceExt; + +fn is_power_of_two(x: usize) -> bool { + x != 0 && (x & (x - 1)) == 0 +} + +#[repr(C)] +#[derive(Clone, Copy, Pod, Zeroable)] +struct BackscatterParams { + n_beams: u32, + n_rays: u32, + n_freq: u32, + ray_skips: u32, + sound_speed: f32, + max_distance: f32, + source_level: f32, + attenuation: f32, + sensor_gain: f32, + bandwidth: f32, + seed_lo: u32, + seed_hi: u32, + frame_lo: u32, // WGSL has no native u64 type. + frame_hi: u32, // Split frame index into two u32 - frame_lo and frame_hi + area_scaler: f32, + h_fov: f32, + v_fov: f32, + _pad0: u32, + _pad1: u32, + _pad2: u32, +} + +#[repr(C)] +#[derive(Clone, Copy, Pod, Zeroable)] +struct MatmulParams { + n_beams: u32, + n_freq: u32, + beam_corrector_sum: f32, +} + +#[repr(C)] +#[derive(Clone, Copy, Pod, Zeroable)] +struct FftParams { + n_beams: u32, + n_freq: u32, + log2_n: u32, +} + +// REMOVE THIS, UNNCESSARY +/// Single-buffer i32 readback kept for CPU-fallback / test paths. +#[allow(dead_code)] +fn readback_i32( + device: &wgpu::Device, + queue: &wgpu::Queue, + src: &wgpu::Buffer, + len: usize, +) -> Option> { + let bytes = (len * std::mem::size_of::()) as u64; + let staging = device.create_buffer(&wgpu::BufferDescriptor { + label: Some("readback_i32"), + size: bytes, + usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("readback_i32_encoder"), + }); + encoder.copy_buffer_to_buffer(src, 0, &staging, 0, bytes); + queue.submit(std::iter::once(encoder.finish())); + device.poll(wgpu::Maintain::Wait); + let slice = staging.slice(..); + slice.map_async(wgpu::MapMode::Read, |_| {}); + device.poll(wgpu::Maintain::Wait); + let data = slice.get_mapped_range(); + let out = bytemuck::cast_slice::(&data).to_vec(); + drop(data); + staging.unmap(); + Some(out) +} + +#[allow(dead_code)] +/// Read two i32 buffers back to CPU in **one** encoder+submit+poll pair. +/// Returns (re_vec, im_vec). +fn readback_i32_pair( + device: &wgpu::Device, + queue: &wgpu::Queue, + src_re: &wgpu::Buffer, + src_im: &wgpu::Buffer, + len: usize, +) -> Option<(Vec, Vec)> { + let bytes = (len * std::mem::size_of::()) as u64; + let stg_re = device.create_buffer(&wgpu::BufferDescriptor { + label: Some("stg_re"), + size: bytes, + usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + let stg_im = device.create_buffer(&wgpu::BufferDescriptor { + label: Some("stg_im"), + size: bytes, + usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + let mut enc = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("readback_i32_pair"), + }); + enc.copy_buffer_to_buffer(src_re, 0, &stg_re, 0, bytes); + enc.copy_buffer_to_buffer(src_im, 0, &stg_im, 0, bytes); + queue.submit(std::iter::once(enc.finish())); + device.poll(wgpu::Maintain::Wait); + + stg_re.slice(..).map_async(wgpu::MapMode::Read, |_| {}); + stg_im.slice(..).map_async(wgpu::MapMode::Read, |_| {}); + device.poll(wgpu::Maintain::Wait); + + let re_data = stg_re.slice(..).get_mapped_range(); + let im_data = stg_im.slice(..).get_mapped_range(); + let re = bytemuck::cast_slice::(&re_data).to_vec(); + let im = bytemuck::cast_slice::(&im_data).to_vec(); + drop(re_data); + drop(im_data); + stg_re.unmap(); + stg_im.unmap(); + Some((re, im)) +} + +// REMOVE THIS, UNNCESSARY +#[allow(dead_code)] +/// Read two f32 buffers back to CPU in **one** encoder+submit+poll pair. +fn readback_f32_pair( + device: &wgpu::Device, + queue: &wgpu::Queue, + src_re: &wgpu::Buffer, + src_im: &wgpu::Buffer, + len: usize, +) -> Option<(Vec, Vec)> { + let bytes = (len * std::mem::size_of::()) as u64; + let stg_re = device.create_buffer(&wgpu::BufferDescriptor { + label: Some("stg_re_f32"), + size: bytes, + usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + let stg_im = device.create_buffer(&wgpu::BufferDescriptor { + label: Some("stg_im_f32"), + size: bytes, + usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + let mut enc = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("readback_f32_pair"), + }); + enc.copy_buffer_to_buffer(src_re, 0, &stg_re, 0, bytes); + enc.copy_buffer_to_buffer(src_im, 0, &stg_im, 0, bytes); + queue.submit(std::iter::once(enc.finish())); + device.poll(wgpu::Maintain::Wait); + + stg_re.slice(..).map_async(wgpu::MapMode::Read, |_| {}); + stg_im.slice(..).map_async(wgpu::MapMode::Read, |_| {}); + device.poll(wgpu::Maintain::Wait); + + let re_data = stg_re.slice(..).get_mapped_range(); + let im_data = stg_im.slice(..).get_mapped_range(); + let re = bytemuck::cast_slice::(&re_data).to_vec(); + let im = bytemuck::cast_slice::(&im_data).to_vec(); + drop(re_data); + drop(im_data); + stg_re.unmap(); + stg_im.unmap(); + Some((re, im)) +} + +// REMOVE THIS, UNNCESSARY +/// Single-buffer f32 readback kept for CPU-fallback / test paths. +#[allow(dead_code)] +fn readback_f32( + device: &wgpu::Device, + queue: &wgpu::Queue, + src: &wgpu::Buffer, + len: usize, +) -> Option> { + let bytes = (len * std::mem::size_of::()) as u64; + let staging = device.create_buffer(&wgpu::BufferDescriptor { + label: Some("readback_f32"), + size: bytes, + usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + + let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("readback_f32_encoder"), + }); + encoder.copy_buffer_to_buffer(src, 0, &staging, 0, bytes); + queue.submit(std::iter::once(encoder.finish())); + device.poll(wgpu::Maintain::Wait); + + let slice = staging.slice(..); + slice.map_async(wgpu::MapMode::Read, |_| {}); + device.poll(wgpu::Maintain::Wait); + + let data = slice.get_mapped_range(); + let out = bytemuck::cast_slice::(&data).to_vec(); + drop(data); + staging.unmap(); + Some(out) +} + +/// The function receives flat arrays from C++ and returns a heap-allocated array. +#[no_mangle] +pub extern "C" fn sonar_wgpu_compute( + depth_flat: *const f32, + normal_flat: *const f32, + reflectivity_flat: *const f32, + beam_corrector_flat: *const f32, + window_flat: *const f32, + n_beams: u32, + n_rays: u32, + n_freq: u32, + ray_skips: u32, + sound_speed: f32, + max_distance: f32, + source_level: f32, + attenuation: f32, + sensor_gain: f32, + bandwidth: f32, + beam_corrector_sum: f32, + area_scaler: f32, + h_fov: f32, + v_fov: f32, + frame_index: u64, + seed: u64, +) -> *mut f32 { + if depth_flat.is_null() + || normal_flat.is_null() + || reflectivity_flat.is_null() + || beam_corrector_flat.is_null() + || window_flat.is_null() + { + return std::ptr::null_mut(); + } + + let n_beams_us = n_beams as usize; + let n_rays_us = n_rays as usize; + let n_freq_us = n_freq as usize; + + if n_beams_us == 0 || n_rays_us == 0 || n_freq_us == 0 { + return std::ptr::null_mut(); + } + + use std::sync::Once; + + let depth_len = n_beams_us * n_rays_us; + let normal_len = depth_len * 3; + let refl_len = depth_len; + let bc_len = n_beams_us * n_beams_us; + let spectrum_len = n_beams_us * n_freq_us; + + let depth = unsafe { std::slice::from_raw_parts(depth_flat, depth_len) }; + let normals = unsafe { std::slice::from_raw_parts(normal_flat, normal_len) }; + let reflectivity = unsafe { std::slice::from_raw_parts(reflectivity_flat, refl_len) }; + let beam_corrector = unsafe { std::slice::from_raw_parts(beam_corrector_flat, bc_len) }; + + let ctx = match pipeline::get_or_init() { + Some(c) => { + static GPU_LOG: Once = Once::new(); + GPU_LOG.call_once(|| eprintln!("[sonar_wgpu] GPU device acquired -> running on GPU.")); + c + } + None => { + static FALLBACK_LOG: Once = Once::new(); + FALLBACK_LOG.call_once(|| eprintln!("[sonar_wgpu] GPU init failed -> returning null (C++ will use CPU backend).")); + return std::ptr::null_mut(); + } + }; + let device = &ctx.device; + let queue = &ctx.queue; + + static GPU_FRAME_COUNT: AtomicU64 = AtomicU64::new(0); + let gpu_frame = GPU_FRAME_COUNT.fetch_add(1, Ordering::Relaxed); + let gpu_t0 = std::time::Instant::now(); + + let window_vals = unsafe { std::slice::from_raw_parts(window_flat, n_freq_us) }; + + // ---- Per-frame uniform buffers (tiny, always recreated) ---- + let backscatter_params = BackscatterParams { + n_beams, + n_rays, + n_freq, + ray_skips, + sound_speed, + max_distance, + source_level, + attenuation, + sensor_gain, + bandwidth, + seed_lo: (seed & 0xFFFF_FFFF) as u32, + seed_hi: (seed >> 32) as u32, + frame_lo: (frame_index & 0xFFFF_FFFF) as u32, + frame_hi: (frame_index >> 32) as u32, + area_scaler, + h_fov, + v_fov, + _pad0: 0, + _pad1: 0, + _pad2: 0, + }; + let bs_param_buf = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: Some("bs_param"), + contents: bytemuck::bytes_of(&backscatter_params), + usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, + }); + let mm_params = MatmulParams { n_beams, n_freq, beam_corrector_sum }; + let mm_param_buf = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: Some("mm_param"), + contents: bytemuck::bytes_of(&mm_params), + usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, + }); + + // REMOVE THIS, UNNCESSARY + // ---- Persistent data buffers: allocate once per dimension set, reuse every frame ---- + let mut buf_guard = ctx.buffers.lock().unwrap(); + { + let needs_alloc = buf_guard.as_ref().map_or(true, |b| { + b.n_beams != n_beams || b.n_rays != n_rays || b.n_freq != n_freq + }); + if needs_alloc { + *buf_guard = Some(pipeline::SonarBuffers::new(device, n_beams, n_rays, n_freq)); + eprintln!("[sonar_wgpu] Persistent GPU buffers allocated for {}×{}×{}", + n_beams, n_rays, n_freq); + } + } + let buf = buf_guard.as_ref().unwrap(); + + // Upload frame data into persistent buffers (DMA, no GPU alloc) + queue.write_buffer(&buf.depth_buf, 0, bytemuck::cast_slice(depth)); + queue.write_buffer(&buf.normal_buf, 0, bytemuck::cast_slice(normals)); + queue.write_buffer(&buf.refl_buf, 0, bytemuck::cast_slice(reflectivity)); + queue.write_buffer(&buf.window_buf, 0, bytemuck::cast_slice(window_vals)); + queue.write_buffer(&buf.bc_buf, 0, bytemuck::cast_slice(beam_corrector)); + // Zero atomic accumulators (reused each frame) + let zeros_i32 = vec![0i32; spectrum_len]; + queue.write_buffer(&buf.out_re_i32, 0, bytemuck::cast_slice(&zeros_i32)); + queue.write_buffer(&buf.out_im_i32, 0, bytemuck::cast_slice(&zeros_i32)); + + // ---- Bind groups (metadata only -> no GPU allocation) ---- + let bs_bg = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("bs_bg"), + layout: &ctx.bs_bgl, + entries: &[ + wgpu::BindGroupEntry { binding: 0, resource: bs_param_buf.as_entire_binding() }, + wgpu::BindGroupEntry { binding: 1, resource: buf.depth_buf.as_entire_binding() }, + wgpu::BindGroupEntry { binding: 2, resource: buf.normal_buf.as_entire_binding() }, + wgpu::BindGroupEntry { binding: 3, resource: buf.refl_buf.as_entire_binding() }, + wgpu::BindGroupEntry { binding: 4, resource: buf.out_re_i32.as_entire_binding() }, + wgpu::BindGroupEntry { binding: 5, resource: buf.out_im_i32.as_entire_binding() }, + wgpu::BindGroupEntry { binding: 6, resource: buf.window_buf.as_entire_binding() }, + ], + }); + let conv_bg = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("conv_bg"), + layout: &ctx.convert_bgl, + entries: &[ + wgpu::BindGroupEntry { binding: 0, resource: buf.out_re_i32.as_entire_binding() }, + wgpu::BindGroupEntry { binding: 1, resource: buf.out_im_i32.as_entire_binding() }, + wgpu::BindGroupEntry { binding: 2, resource: buf.mm_re_in.as_entire_binding() }, + wgpu::BindGroupEntry { binding: 3, resource: buf.mm_im_in.as_entire_binding() }, + ], + }); + let mm_re_bg = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("mm_re_bg"), + layout: &ctx.mm_bgl, + entries: &[ + wgpu::BindGroupEntry { binding: 0, resource: mm_param_buf.as_entire_binding() }, + wgpu::BindGroupEntry { binding: 1, resource: buf.bc_buf.as_entire_binding() }, + wgpu::BindGroupEntry { binding: 2, resource: buf.mm_re_in.as_entire_binding() }, + wgpu::BindGroupEntry { binding: 3, resource: buf.mm_re_out.as_entire_binding() }, + ], + }); + let mm_im_bg = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("mm_im_bg"), + layout: &ctx.mm_bgl, + entries: &[ + wgpu::BindGroupEntry { binding: 0, resource: mm_param_buf.as_entire_binding() }, + wgpu::BindGroupEntry { binding: 1, resource: buf.bc_buf.as_entire_binding() }, + wgpu::BindGroupEntry { binding: 2, resource: buf.mm_im_in.as_entire_binding() }, + wgpu::BindGroupEntry { binding: 3, resource: buf.mm_im_out.as_entire_binding() }, + ], + }); + + let spectrum_bytes = (spectrum_len * std::mem::size_of::()) as u64; + let gpu_fft = n_freq_us <= 4096 && is_power_of_two(n_freq_us); + + // ---- Single command encoder: all passes + staging copy in one GPU submission ---- + let mut enc = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("sonar_enc"), + }); + + // Backscatter dispatch: depth/normals/refl → i32 fixed-point spectrum + { + let mut pass = enc.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: Some("bs_pass"), + timestamp_writes: None, + }); + pass.set_pipeline(&ctx.bs_pipeline); + pass.set_bind_group(0, &bs_bg, &[]); + let gx = (n_beams + 7) / 8; + let ray_step = std::cmp::max(1, ray_skips); + let reduced_rays = (n_rays + ray_step - 1) / ray_step; + let gy = (reduced_rays + 7) / 8; + pass.dispatch_workgroups(gx, gy, 1); + } + + // Convert dispatch: i32 fixed-point → f32 (GPU-side, eliminates CPU readback round-trip) + { + let mut pass = enc.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: Some("conv_pass"), + timestamp_writes: None, + }); + pass.set_pipeline(&ctx.convert_pipeline); + pass.set_bind_group(0, &conv_bg, &[]); + let gx = (spectrum_len as u32 + 63) / 64; + pass.dispatch_workgroups(gx, 1, 1); + } + + // Matmul dispatch: beam-corrector matrix multiply, re and im in same encoder + { + let gx = (n_freq + 15) / 16; + let gy = (n_beams + 15) / 16; + { + let mut pass = enc.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: Some("mm_re_pass"), + timestamp_writes: None, + }); + pass.set_pipeline(&ctx.mm_pipeline); + pass.set_bind_group(0, &mm_re_bg, &[]); + pass.dispatch_workgroups(gx, gy, 1); + } + { + let mut pass = enc.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: Some("mm_im_pass"), + timestamp_writes: None, + }); + pass.set_pipeline(&ctx.mm_pipeline); + pass.set_bind_group(0, &mm_im_bg, &[]); + pass.dispatch_workgroups(gx, gy, 1); + } + } + + // FFT Dispatch (optional, only if n_freq is power-of-two and <= 4096); re/im in same encoder + if gpu_fft { + // copy MM output → FFT buffers → dispatch fft shader → copy to staging + enc.copy_buffer_to_buffer(&buf.mm_re_out, 0, &buf.p_re_buf, 0, spectrum_bytes); + enc.copy_buffer_to_buffer(&buf.mm_im_out, 0, &buf.p_im_buf, 0, spectrum_bytes); + + let fft_params = FftParams { n_beams, n_freq, log2_n: n_freq_us.ilog2() }; + let fft_param_buf = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: Some("fft_param"), + contents: bytemuck::bytes_of(&fft_params), + usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, + }); + let fft_bg = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("fft_bg"), + layout: &ctx.fft_bgl, + entries: &[ + wgpu::BindGroupEntry { binding: 0, resource: fft_param_buf.as_entire_binding() }, + wgpu::BindGroupEntry { binding: 1, resource: buf.p_re_buf.as_entire_binding() }, + wgpu::BindGroupEntry { binding: 2, resource: buf.p_im_buf.as_entire_binding() }, + ], + }); + { + let mut pass = enc.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: Some("fft_pass"), + timestamp_writes: None, + }); + pass.set_pipeline(&ctx.fft_pipeline); + pass.set_bind_group(0, &fft_bg, &[]); + pass.dispatch_workgroups(n_beams, 1, 1); + } + // ... fft dispatch ... then copy FFT output to staging for readback, all in same encoder + enc.copy_buffer_to_buffer(&buf.p_re_buf, 0, &buf.stg_re, 0, spectrum_bytes); + enc.copy_buffer_to_buffer(&buf.p_im_buf, 0, &buf.stg_im, 0, spectrum_bytes); + } else { + // CPU FFT path: copy MM output directly to staging + enc.copy_buffer_to_buffer(&buf.mm_re_out, 0, &buf.stg_re, 0, spectrum_bytes); + enc.copy_buffer_to_buffer(&buf.mm_im_out, 0, &buf.stg_im, 0, spectrum_bytes); + } + + // Submit the entire frame in a single GPU command -> POLL 1 + queue.submit(std::iter::once(enc.finish())); + device.poll(wgpu::Maintain::Wait); + + // map_async is asynchronous -> it requests CPU access to the buffer but doesn't block. + // Map staging buffers (GPU work already done above) -> POLL 2 + buf.stg_re.slice(..).map_async(wgpu::MapMode::Read, |_| {}); + buf.stg_im.slice(..).map_async(wgpu::MapMode::Read, |_| {}); + device.poll(wgpu::Maintain::Wait); + + let (mut p_re, mut p_im) = { + let re_data = buf.stg_re.slice(..).get_mapped_range(); + let im_data = buf.stg_im.slice(..).get_mapped_range(); + let re = bytemuck::cast_slice::(&re_data).to_vec(); + let im = bytemuck::cast_slice::(&im_data).to_vec(); + (re, im) + }; + buf.stg_re.unmap(); + buf.stg_im.unmap(); + + // CPU FFT fallback (n_freq not power-of-two; uses Bluestein O(N log N)) + if !gpu_fft { + let fft_t0 = std::time::Instant::now(); + let mut complex = vec![ComplexF::default(); spectrum_len]; + for i in 0..spectrum_len { + complex[i].re = p_re[i]; + complex[i].im = p_im[i]; + } + let fallback = fft::fft_batched(&complex, n_beams_us, n_freq_us); + for i in 0..spectrum_len { + p_re[i] = fallback[i].re; + p_im[i] = fallback[i].im; + } + if gpu_frame == 0 || gpu_frame % 50 == 49 { + eprintln!("[sonar_wgpu] CPU FFT: {:6.1} ms ({} beams x {} freq)", + fft_t0.elapsed().as_secs_f64() * 1000.0, n_beams, n_freq); + } + } + + // Matches CUDA's post-FFT * delta_f scaling + let delta_f = bandwidth / (n_freq as f32); + for i in 0..spectrum_len { + p_re[i] *= delta_f; + p_im[i] *= delta_f; + } + + // Pack into interleaved [re0, im0, re1, im1, ...] for C++ caller + let mut out = vec![0.0f32; spectrum_len * 2]; + for b in 0..n_beams_us { + for f in 0..n_freq_us { + let idx = b * n_freq_us + f; + out[idx * 2] = p_re[idx]; + out[idx * 2 + 1] = p_im[idx]; + } + } + + let elapsed_ms = gpu_t0.elapsed().as_secs_f64() * 1000.0; + // Log on the very first frame and then every 50 frames. + if gpu_frame == 0 || gpu_frame % 50 == 49 { + let active_rays = (n_rays + std::cmp::max(1, ray_skips) - 1) / std::cmp::max(1, ray_skips); + eprintln!( + "[sonar_wgpu] GPU #{:<5} | {:6.1} ms | {} beams × {} rays × {} freq", + gpu_frame + 1, elapsed_ms, n_beams, active_rays, n_freq + ); + } + + let mut boxed = out.into_boxed_slice(); + let ptr = boxed.as_mut_ptr(); + std::mem::forget(boxed); + ptr +} + +#[no_mangle] +pub extern "C" fn sonar_wgpu_free(ptr: *mut f32, len: usize) { + if ptr.is_null() || len == 0 { + return; + } + unsafe { + let _ = Vec::from_raw_parts(ptr, len, len); + } +} diff --git a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/pipeline.rs b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/pipeline.rs new file mode 100644 index 00000000..30d20e9a --- /dev/null +++ b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/pipeline.rs @@ -0,0 +1,424 @@ +/// Adapters, device, queue setups + BUFFERS SETUP + PIPELINE SETUPS + PIPELINE COMPILE + GPU CONTEXT INIT all in pipeline.rs +/// + Shaders compiled + +use std::sync::OnceLock; + +/// GPU buffer lifetime: persistent allocation across frames via queue.write_buffer. +/// Buffers reallocated only when sonar dimensions change. +pub struct SonarBuffers { + pub n_beams: u32, + pub n_rays: u32, + pub n_freq: u32, + // Input data (written via queue.write_buffer each frame) + pub depth_buf: wgpu::Buffer, + pub normal_buf: wgpu::Buffer, + pub refl_buf: wgpu::Buffer, + pub window_buf: wgpu::Buffer, + pub bc_buf: wgpu::Buffer, + // Backscatter atomic i32 accumulators (zeroed via write_buffer each frame) + pub out_re_i32: wgpu::Buffer, + pub out_im_i32: wgpu::Buffer, + // Convert output / matmul input (f32) + pub mm_re_in: wgpu::Buffer, + pub mm_im_in: wgpu::Buffer, + // Matmul output / FFT input + pub mm_re_out: wgpu::Buffer, + pub mm_im_out: wgpu::Buffer, + // FFT in-place buffers + pub p_re_buf: wgpu::Buffer, + pub p_im_buf: wgpu::Buffer, + // Host-visible staging buffers for final readback + pub stg_re: wgpu::Buffer, + pub stg_im: wgpu::Buffer, +} + +impl SonarBuffers { + /// Buffer allocation for 4 pipeline stages: backscatter, convert, matmul, FFT. + /// Chain: backscatter→convert→matmul→FFT→readback. + pub fn new(device: &wgpu::Device, n_beams: u32, n_rays: u32, n_freq: u32) -> Self { + let depth_ray = (n_beams as usize) * (n_rays as usize); + let spectrum = (n_beams as usize) * (n_freq as usize); + let bc = (n_beams as usize) * (n_beams as usize); + let f32_bytes = |n: usize| (n * std::mem::size_of::()) as u64; + let i32_bytes = |n: usize| (n * std::mem::size_of::()) as u64; + + SonarBuffers { + n_beams, n_rays, n_freq, + depth_buf: device.create_buffer(&wgpu::BufferDescriptor { + label: Some("pb_depth"), + size: f32_bytes(depth_ray), + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }), + normal_buf: device.create_buffer(&wgpu::BufferDescriptor { + label: Some("pb_normal"), + size: f32_bytes(depth_ray * 3), + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }), + refl_buf: device.create_buffer(&wgpu::BufferDescriptor { + label: Some("pb_refl"), + size: f32_bytes(depth_ray), + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }), + window_buf: device.create_buffer(&wgpu::BufferDescriptor { + label: Some("pb_window"), + size: f32_bytes(n_freq as usize), + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }), + bc_buf: device.create_buffer(&wgpu::BufferDescriptor { + label: Some("pb_bc"), + size: f32_bytes(bc), + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }), + out_re_i32: device.create_buffer(&wgpu::BufferDescriptor { + label: Some("pb_re_i32"), + size: i32_bytes(spectrum), + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }), + out_im_i32: device.create_buffer(&wgpu::BufferDescriptor { + label: Some("pb_im_i32"), + size: i32_bytes(spectrum), + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }), + mm_re_in: device.create_buffer(&wgpu::BufferDescriptor { + label: Some("pb_mm_re_in"), + size: f32_bytes(spectrum), + usage: wgpu::BufferUsages::STORAGE, + mapped_at_creation: false, + }), + mm_im_in: device.create_buffer(&wgpu::BufferDescriptor { + label: Some("pb_mm_im_in"), + size: f32_bytes(spectrum), + usage: wgpu::BufferUsages::STORAGE, + mapped_at_creation: false, + }), + mm_re_out: device.create_buffer(&wgpu::BufferDescriptor { + label: Some("pb_mm_re_out"), + size: f32_bytes(spectrum), + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC, + mapped_at_creation: false, + }), + mm_im_out: device.create_buffer(&wgpu::BufferDescriptor { + label: Some("pb_mm_im_out"), + size: f32_bytes(spectrum), + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC, + mapped_at_creation: false, + }), + p_re_buf: device.create_buffer(&wgpu::BufferDescriptor { + label: Some("pb_p_re"), + size: f32_bytes(spectrum), + usage: wgpu::BufferUsages::STORAGE + | wgpu::BufferUsages::COPY_SRC + | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }), + p_im_buf: device.create_buffer(&wgpu::BufferDescriptor { + label: Some("pb_p_im"), + size: f32_bytes(spectrum), + usage: wgpu::BufferUsages::STORAGE + | wgpu::BufferUsages::COPY_SRC + | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }), + stg_re: device.create_buffer(&wgpu::BufferDescriptor { + label: Some("pb_stg_re"), + size: f32_bytes(spectrum), + usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }), + stg_im: device.create_buffer(&wgpu::BufferDescriptor { + label: Some("pb_stg_im"), + size: f32_bytes(spectrum), + usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }), + } + } +} + +/// GPU compute context: device handle + compiled shader pipelines. +/// Four pipelines compiled once at init: backscatter, convert, matmul, FFT. +pub struct GpuContext { + pub device: wgpu::Device, // the GPU -> used to create everything + pub queue: wgpu::Queue, // the command queue -> used to submit work + + // Backscatter kernel: ray-surface interactions with Lambert scatter. + pub bs_pipeline: wgpu::ComputePipeline, // compiled backscatter shader + pub bs_bgl: wgpu::BindGroupLayout, // describes bs shader's buffer slots + + // i32→f32 conversion: fixed-point atomic backscatter output to float spectrum. + pub convert_pipeline: wgpu::ComputePipeline, + pub convert_bgl: wgpu::BindGroupLayout, + + // Beam correction GEMM: tiled matrix multiply for spectrum. + pub mm_pipeline: wgpu::ComputePipeline, + pub mm_bgl: wgpu::BindGroupLayout, + + // FFT kernel: power-of-2 only, N≤4096 (4096 is the shared memory limit). CPU fallback via Bluestein for arbitrary N. + pub fft_pipeline: wgpu::ComputePipeline, + pub fft_bgl: wgpu::BindGroupLayout, + + // SonarBuffers are None until first compute call when dimensions are known. + // Reallocated automatically if dimensions change between frames. + // Persistent GPU buffers across frames: CUDA cudaMalloc equivalent (allocated once, reused across ≥1 frames with same dimensions). + pub buffers: std::sync::Mutex>, +} + +static GPU_CONTEXT: OnceLock> = OnceLock::new(); + +/// Bind group layout entry: storage buffer descriptor. +fn make_storage_entry(binding: u32, read_only: bool) -> wgpu::BindGroupLayoutEntry { + wgpu::BindGroupLayoutEntry { + binding, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only }, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + } +} + +/// Uniform entry: shader parameter buffer. +fn make_uniform_entry(binding: u32) -> wgpu::BindGroupLayoutEntry { + wgpu::BindGroupLayoutEntry { + binding, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + } +} + +/// Bind group layout builder: defines buffer bindings for each shader. +fn build_bgls(device: &wgpu::Device) -> ( + wgpu::BindGroupLayout, + wgpu::BindGroupLayout, + wgpu::BindGroupLayout, + wgpu::BindGroupLayout, +) { + // Backscatter kernel params: uniform params, depth, normals, reflectivity, output accumulators, window. + let bs_bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("bs_bgl"), + entries: &[ + make_uniform_entry(0), // (params) + make_storage_entry(1, true), // (depth) true = read_only + make_storage_entry(2, true), // (normal) + make_storage_entry(3, true), // (reflectivity) + make_storage_entry(4, false), // (out_re_i32) false = read_write + make_storage_entry(5, false), // (out_im_i32) + make_storage_entry(6, true), // (window) + ], + }); + + // Matmul (GEMM) kernel params: uniform params, beam_corrector, spectrum_in, spectrum_out. + let mm_bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("mm_bgl"), + entries: &[ + make_uniform_entry(0), + make_storage_entry(1, true), + make_storage_entry(2, true), + make_storage_entry(3, false), + ], + }); + + // FFT kernel params: uniform params, spectrum_re, spectrum_im (in-place). + let fft_bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("fft_bgl"), + entries: &[ + make_uniform_entry(0), + make_storage_entry(1, false), + make_storage_entry(2, false), + ], + }); + + // Convert kernel params: int32 input (re/im), float32 output (re/im). + let convert_bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("convert_bgl"), + entries: &[ + make_storage_entry(0, true), + make_storage_entry(1, true), + make_storage_entry(2, false), + make_storage_entry(3, false), + ], + }); + + (bs_bgl, mm_bgl, fft_bgl, convert_bgl) +} + +/// Pipeline compilation: shader module compilation + pipeline layout binding (one-time init). +fn compile_pipeline( + device: &wgpu::Device, + label: &str, + wgsl: &str, + bgl: &wgpu::BindGroupLayout, +) -> wgpu::ComputePipeline { + let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: Some(label), + source: wgpu::ShaderSource::Wgsl(wgsl.into()), + }); + let layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: Some(label), + bind_group_layouts: &[bgl], + push_constant_ranges: &[], + }); + device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: Some(label), + layout: Some(&layout), + module: &shader, + entry_point: "main", + compilation_options: wgpu::PipelineCompilationOptions::default(), + }) +} + +/// GPU context initialization: Vulkan adapter enumeration, device request. +fn init_gpu_context() -> Option { + // TODO: These environment variable overrides are NVIDIA/Vulkan-specific (Linux only). + // Replace with wgpu::Backends::all() so the adapter selection works automatically + // on any platform (Metal on macOS, DX12 on Windows, Vulkan on Linux) without + // needing manual ICD path configuration. + if std::env::var("VK_ICD_FILENAMES").is_err() { + unsafe { + std::env::set_var("VK_ICD_FILENAMES", "/usr/share/vulkan/icd.d/nvidia_icd.json"); + } + } + if std::env::var("VK_LAYER_PATH").is_err() { + unsafe { + std::env::set_var("DISABLE_LAYER_NV_optimus", "1"); + } + } + + // Catch panic to prevent GPU init failure from crashing Gazebo. + let result = std::panic::catch_unwind(|| { + // TODO: Replace wgpu::Backends::VULKAN with wgpu::Backends::all() to support + // Metal (macOS), DX12 (Windows), and other backends automatically. + // The explicit VULKAN flag means this will fail silently on non-Vulkan platforms. + let instance = wgpu::Instance::new(wgpu::InstanceDescriptor { + backends: wgpu::Backends::VULKAN, + ..Default::default() + }); + + // TODO: Same as above — enumerate_adapters(wgpu::Backends::all()) would pick up + // the best available backend on any platform instead of Vulkan-only. + let adapters: Vec<_> = instance + .enumerate_adapters(wgpu::Backends::VULKAN) + .into_iter() + .collect(); + + eprintln!("[sonar_wgpu] Vulkan adapters found: {}", adapters.len()); + for a in &adapters { + let info = a.get_info(); + eprintln!("[sonar_wgpu] {:?} -> {}", info.device_type, info.name); + } + + let adapter = adapters + .iter() + .find(|a| a.get_info().device_type == wgpu::DeviceType::DiscreteGpu) + .or_else(|| adapters.first())?; + + eprintln!("[sonar_wgpu] Selected adapter: {}", adapter.get_info().name); + let (device, queue) = pollster::block_on(adapter.request_device( + &wgpu::DeviceDescriptor { + label: Some("sonar_wgpu_device"), + required_features: wgpu::Features::empty(), + required_limits: wgpu::Limits::default(), + }, + None, + )) + .map_err(|e| { + eprintln!("[sonar_wgpu] request_device failed: {e}"); + e + }) + .ok()?; + + // One-time shader compilation. + let t0 = std::time::Instant::now(); + let (bs_bgl, mm_bgl, fft_bgl, convert_bgl) = build_bgls(&device); + let bs_pipeline = compile_pipeline( + &device, "bs_pipeline", + include_str!("shaders/backscatter.wgsl"), &bs_bgl, + ); + let convert_pipeline = compile_pipeline( + &device, "convert_pipeline", + include_str!("shaders/convert.wgsl"), &convert_bgl, + ); + let mm_pipeline = compile_pipeline( + &device, "mm_pipeline", + include_str!("shaders/matmul.wgsl"), &mm_bgl, + ); + let fft_pipeline = compile_pipeline( + &device, "fft_pipeline", + include_str!("shaders/fft.wgsl"), &fft_bgl, + ); + eprintln!("[sonar_wgpu] GPU pipelines compiled in {:.0} ms -> ready.", t0.elapsed().as_millis()); + + Some(GpuContext { device, queue, bs_pipeline, bs_bgl, convert_pipeline, convert_bgl, mm_pipeline, mm_bgl, fft_pipeline, fft_bgl, buffers: std::sync::Mutex::new(None) }) + }); + + match result { + Ok(ctx) => ctx, + Err(_) => { + eprintln!("[sonar_wgpu] GPU init panicked; falling back to CPU path"); + None + } + } +} + +/// Global singleton: lazily initializes GPU context on first access. +/// Returns None if GPU unavailable; falls back to CPU implementation. +pub fn get_or_init() -> Option<&'static GpuContext> { + GPU_CONTEXT.get_or_init(init_gpu_context).as_ref() +} + + +/* +Data flow with buffer names: + + CPU INPUT DATA + │ + ▼ + depth_buf [n_beams × n_rays] f32 COPY_DST | STORAGE + normal_buf [n_beams × n_rays × 3] f32 COPY_DST | STORAGE + refl_buf [n_beams × n_rays] f32 COPY_DST | STORAGE + window_buf [n_freq] f32 COPY_DST | STORAGE + bc_buf [n_beams × n_beams] f32 COPY_DST | STORAGE + │ + ▼ backscatter.wgsl (reads above, writes below) + │ + out_re_i32 [n_beams × n_freq] i32 COPY_DST | STORAGE ← atomic accumulators + out_im_i32 [n_beams × n_freq] i32 COPY_DST | STORAGE ← zeroed each frame + │ + ▼ convert.wgsl (i32 → f32) + │ + mm_re_in [n_beams × n_freq] f32 STORAGE + mm_im_in [n_beams × n_freq] f32 STORAGE + │ + ▼ matmul.wgsl (beam correction) + │ + mm_re_out [n_beams × n_freq] f32 STORAGE | COPY_SRC + mm_im_out [n_beams × n_freq] f32 STORAGE | COPY_SRC + │ + ▼ copied into FFT buffers + │ + p_re_buf [n_beams × n_freq] f32 STORAGE | COPY_SRC | COPY_DST + p_im_buf [n_beams × n_freq] f32 STORAGE | COPY_SRC | COPY_DST + │ + ▼ fft.wgsl (in-place FFT) + │ + ▼ copied to staging + │ + stg_re [n_beams × n_freq] f32 MAP_READ | COPY_DST + stg_im [n_beams × n_freq] f32 MAP_READ | COPY_DST + │ + ▼ CPU reads back final result + */ \ No newline at end of file diff --git a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/backscatter.wgsl b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/backscatter.wgsl new file mode 100644 index 00000000..a3a7a643 --- /dev/null +++ b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/backscatter.wgsl @@ -0,0 +1,179 @@ +/// The physics kernel. + +struct Params { + n_beams: u32, + n_rays: u32, + n_freq: u32, + ray_skips: u32, + sound_speed: f32, + max_distance: f32, + source_level: f32, + attenuation: f32, + sensor_gain: f32, + bandwidth: f32, + seed_lo: u32, + seed_hi: u32, + frame_lo: u32, + frame_hi: u32, + area_scaler: f32, + h_fov: f32, + v_fov: f32, + _pad0: u32, + _pad1: u32, + _pad2: u32, +}; + +@group(0) @binding(0) var params: Params; +@group(0) @binding(1) var depth: array; +@group(0) @binding(2) var normals: array; +@group(0) @binding(3) var reflectivity: array; +@group(0) @binding(4) var out_re: array>; +@group(0) @binding(5) var out_im: array>; +@group(0) @binding(6) var window_vals: array; + +const PI: f32 = 3.14159265358979323846; +const SCALE: f32 = 1024.0; +const LIMIT: i32 = 2147483520i; // near i32 max + +// Philox4x32-10 RNG: WGPU/WGSL port of CUDA curand implementation. +// Based on Salmon et al., 2011, SC'11: "Random123: A Library of Counter-Based Random Number Generators". +// Based on https://github.com/DEShawResearch/random123/blob/main/include/Random123/philox.h +fn mulhilo32(a: u32, b: u32) -> vec2 { + let a_lo = a & 0xFFFFu; + let a_hi = a >> 16u; + let b_lo = b & 0xFFFFu; + let b_hi = b >> 16u; + let lo = a * b; + let p0 = a_lo * b_lo; + let p1 = a_hi * b_lo; + let p2 = a_lo * b_hi; + let p3 = a_hi * b_hi; + let mid = (p0 >> 16u) + (p1 & 0xFFFFu) + (p2 & 0xFFFFu); + let hi = p3 + (p1 >> 16u) + (p2 >> 16u) + (mid >> 16u); + return vec2(hi, lo); +} + +// Single Philox4x32 round. +fn philox_round(c: vec4, k: vec2) -> vec4 { + let r0 = mulhilo32(0xD2511F53u, c.x); + let r1 = mulhilo32(0xCD9E8D57u, c.z); + return vec4(r1.x ^ c.y ^ k.x, r1.y, r0.x ^ c.w ^ k.y, r0.y); +} + +// Full 10-round Philox4x32. +fn philox4x32_10(ctr: vec4, key: vec2) -> vec4 { + var c = ctr; + var k = key; + c = philox_round(c, k); k.x += 0x9E3779B9u; k.y += 0xBB67AE85u; + c = philox_round(c, k); k.x += 0x9E3779B9u; k.y += 0xBB67AE85u; + c = philox_round(c, k); k.x += 0x9E3779B9u; k.y += 0xBB67AE85u; + c = philox_round(c, k); k.x += 0x9E3779B9u; k.y += 0xBB67AE85u; + c = philox_round(c, k); k.x += 0x9E3779B9u; k.y += 0xBB67AE85u; + c = philox_round(c, k); k.x += 0x9E3779B9u; k.y += 0xBB67AE85u; + c = philox_round(c, k); k.x += 0x9E3779B9u; k.y += 0xBB67AE85u; + c = philox_round(c, k); k.x += 0x9E3779B9u; k.y += 0xBB67AE85u; + c = philox_round(c, k); k.x += 0x9E3779B9u; k.y += 0xBB67AE85u; + c = philox_round(c, k); + return c; +} + +// Convert raw u32 to uniform float (0,1]. +fn philox_uniform(x: u32) -> f32 { + return f32(x) * (1.0 / 4294967296.0) + (0.5 / 4294967296.0); +} + +// Generate 4 normal-distributed floats using Box-Muller transform. +// Counter: [frame_lo, frame_hi, subsequence, 0], Key: [seed_lo, seed_hi] +// Mirrors CUDA curand_init(seed, subsequence, offset=0) +fn philox_normal4(seed_lo: u32, seed_hi: u32, frame_lo: u32, frame_hi: u32, subsequence: u32) -> vec4 { + let raw = philox4x32_10(vec4(frame_lo, frame_hi, subsequence, 0u), vec2(seed_lo, seed_hi)); + let u1 = philox_uniform(raw.x); + let u2 = philox_uniform(raw.y); + let u3 = philox_uniform(raw.z); + let u4 = philox_uniform(raw.w); + let r1 = sqrt(-2.0 * log(u1)); + let t1 = 2.0 * PI * u2; + let r2 = sqrt(-2.0 * log(u3)); + let t2 = 2.0 * PI * u4; + return vec4(r1 * sin(t1), r1 * cos(t1), r2 * sin(t2), r2 * cos(t2)); +} + +@compute @workgroup_size(8, 8, 1) +fn main(@builtin(global_invocation_id) gid: vec3) { + let beam = gid.x; + let reduced_ray = gid.y; + if (beam >= params.n_beams) { + return; + } + + let ray_step = max(1u, params.ray_skips); + let actual_ray = reduced_ray * ray_step; + if (actual_ray >= params.n_rays) { + return; + } + + let dr = beam * params.n_rays + actual_ray; + let d = depth[dr]; + if (d <= 0.0 || d > params.max_distance) { + return; + } + + let nx = normals[dr * 3u + 0u]; + let ny = normals[dr * 3u + 1u]; + let nz = normals[dr * 3u + 2u]; + let refl = reflectivity[dr]; + + // Compute sensor-frame ray unit direction from (beam, actual_ray) pixel indices. + // Direction formula (sin(θ_b)cos(θ_r), sin(θ_r), cos(θ_b)cos(θ_r)) is unit-length by construction. + let beam_ang = params.h_fov * (f32(beam) / max(f32(params.n_beams) - 1.0, 1.0) - 0.5); + let ray_ang = params.v_fov * (f32(actual_ray) / max(f32(params.n_rays) - 1.0, 1.0) - 0.5); + let rd_x = sin(beam_ang) * cos(ray_ang); + let rd_y = sin(ray_ang); + let rd_z = cos(beam_ang) * cos(ray_ang); + // let cos_inc = abs(rd_x * nx + rd_y * ny + rd_z * nz); + let cos_inc = abs(nz); + let lambert_sqrt = sqrt(max(refl, 0.0)) * cos_inc; + + let source_term = sqrt(pow(10.0, params.source_level / 10.0)) * 1e-6; + let propagation = exp(-2.0 * params.attenuation * d) / (d * d); + let target_area_sqrt = sqrt(d * params.area_scaler); + // let amp_scale = source_term * propagation * lambert_sqrt * target_area_sqrt * params.sensor_gain; + let amp_scale = source_term * propagation * lambert_sqrt * target_area_sqrt; + + let subsequence = beam * params.n_rays + actual_ray; + let xi = philox_normal4(params.seed_lo, params.seed_hi, params.frame_lo, params.frame_hi, subsequence); + + let amp_re = (xi.x / sqrt(2.0)) * amp_scale; + let amp_im = (xi.y / sqrt(2.0)) * amp_scale; + + let delta_f = params.bandwidth / f32(params.n_freq); + let n_freq_f = f32(params.n_freq); + let is_even = (params.n_freq % 2u) == 0u; + + // Keep window buffer bound for interface stability. + _ = window_vals[0u]; + + // Per-frequency phase sweep + for (var f: u32 = 0u; f < params.n_freq; f = f + 1u) { + var freq: f32; + if (is_even) { + freq = delta_f * (-n_freq_f + 2.0 * (f32(f) + 1.0)) / 2.0; + } else { + freq = delta_f * (-(n_freq_f - 1.0) + 2.0 * (f32(f) + 1.0)) / 2.0; + } + + let kw = 2.0 * PI * freq / params.sound_speed; + let phase = 2.0 * d * kw; + let c = cos(phase); + let s = sin(phase); + + // let re_val = clamp((amp_re * c - amp_im * s) * SCALE, -LIMIT, LIMIT); + // let im_val = clamp((amp_re * s + amp_im * c) * SCALE, -LIMIT, LIMIT); + let re_val = clamp(i32(round((amp_re * c - amp_im * s) * SCALE)), -LIMIT, LIMIT); + let im_val = clamp(i32(round((amp_re * s + amp_im * c) * SCALE)), -LIMIT, LIMIT); + + let idx = beam * params.n_freq + f; + atomicAdd(&out_re[idx], i32(re_val)); + atomicAdd(&out_im[idx], i32(im_val)); + } +} diff --git a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/convert.wgsl b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/convert.wgsl new file mode 100644 index 00000000..afa4ba6f --- /dev/null +++ b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/convert.wgsl @@ -0,0 +1,15 @@ +/// Fixed-point atomic accumulation in backscatter.wgsl (SCALE=1024) allows us +/// to avoid floating-point atomics, which are not widely supported on GPUs. This shader +/// converts the accumulated i32 values back to f32 by dividing by SCALE=1024, precision 0.001. +@group(0) @binding(0) var in_re: array; +@group(0) @binding(1) var in_im: array; +@group(0) @binding(2) var out_re: array; +@group(0) @binding(3) var out_im: array; + +@compute @workgroup_size(64) +fn main(@builtin(global_invocation_id) gid: vec3) { + let i = gid.x; + if (i >= arrayLength(&in_re)) { return; } + out_re[i] = f32(in_re[i]) / 1024.0; + out_im[i] = f32(in_im[i]) / 1024.0; +} diff --git a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/fft.wgsl b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/fft.wgsl new file mode 100644 index 00000000..7dec6e45 --- /dev/null +++ b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/fft.wgsl @@ -0,0 +1,86 @@ +// Cooley-Tukey FFT (power-of-2 sizes only, N≤4096), or CPU fallback for arbitrary N. + +struct Params { + n_beams: u32, + n_freq: u32, + log2_n: u32, +}; + +@group(0) @binding(0) var params: Params; +@group(0) @binding(1) var p_re: array; +@group(0) @binding(2) var p_im: array; + +// WGSL writes to shared memory (smem_re/im) -> fast scratchpad local to workgroup. +var smem_re: array; +var smem_im: array; + +// Bit reversal for FFT permutation. +fn bit_reverse(v: u32, bits: u32) -> u32 { + var x = v; + var y: u32 = 0u; + for (var i: u32 = 0u; i < bits; i = i + 1u) { + y = (y << 1u) | (x & 1u); + x = x >> 1u; + } + return y; +} + +// Cooley-Tukey FFT kernel with bit-reversal, butterfly stages, and writeback. +@compute @workgroup_size(256, 1, 1) +fn main( + @builtin(workgroup_id) wg: vec3, // (wg) -> which beam this workgroup handles + @builtin(local_invocation_id) lid: vec3 // (lid) -> which thread within the workgroup (0..255) +) { + let beam = wg.x; + if (beam >= params.n_beams || params.n_freq > 4096u) { + return; + } + + let n = params.n_freq; + let base = beam * n; + + // Bit-reversal permutation. + for (var i: u32 = lid.x; i < n; i = i + 256u) { + let j = bit_reverse(i, params.log2_n); + smem_re[j] = p_re[base + i]; + smem_im[j] = p_im[base + i]; + } + workgroupBarrier(); + + // Cooley-Tukey butterfly iterations. + for (var s: u32 = 0u; s < params.log2_n; s = s + 1u) { + let half = 1u << s; + let stride = half << 1u; + + for (var i: u32 = lid.x; i < n / 2u; i = i + 256u) { + let group = i / half; + let j = i % half; + let i0 = group * stride + j; + let i1 = i0 + half; + + // Twiddle factor: root-of-unity exp(-j*2*pi*k/N). + let angle = -2.0 * 3.14159265358979323846 * f32(j) / f32(stride); + let wr = cos(angle); + let wi = sin(angle); + + // Butterfly combine: complex multiply (u ± w*t). + let tr = smem_re[i1] * wr - smem_im[i1] * wi; + let ti = smem_re[i1] * wi + smem_im[i1] * wr; + let ur = smem_re[i0]; + let ui = smem_im[i0]; + + smem_re[i0] = ur + tr; + smem_im[i0] = ui + ti; + smem_re[i1] = ur - tr; + smem_im[i1] = ui - ti; + } + + workgroupBarrier(); + } + + // Writeback to global storage. + for (var i: u32 = lid.x; i < n; i = i + 256u) { + p_re[base + i] = smem_re[i]; + p_im[base + i] = smem_im[i]; + } +} diff --git a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/matmul.wgsl b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/matmul.wgsl new file mode 100644 index 00000000..41f6e226 --- /dev/null +++ b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/matmul.wgsl @@ -0,0 +1,71 @@ +// GPU GEMM: WGPU compute shader for beam correction via matrix multiply. +// Tiled approach: C = A*B normalized, with shared memory for cache optimization. + +struct Params { + n_beams: u32, + n_freq: u32, + beam_corrector_sum: f32, +}; + +@group(0) @binding(0) var params: Params; +@group(0) @binding(1) var a: array; // beam_corrector [nBeams*nBeams] +@group(0) @binding(2) var b: array; // spectrum input [nBeams*nFreq] +@group(0) @binding(3) var c: array; // output [nBeams*nFreq] + +// Shared memory tiles for GEMM optimization (tile size 16x16). +var tile_a: array, 16>; +var tile_b: array, 16>; + +// Tiled GEMM kernel: 16x16 workgroup, each thread computes one C[row,col] element. +@compute @workgroup_size(16, 16, 1) +fn main( + @builtin(global_invocation_id) gid: vec3, + @builtin(local_invocation_id) lid: vec3 +) { + let row = gid.y; + let col = gid.x; + if (row >= params.n_beams || col >= params.n_freq) { + return; + } + + // Accumulator: accumulates partial products across tiles. + var acc = 0.0; + let tiles = (params.n_beams + 15u) / 16u; + + // Loop over tiles of size 16: classic blocked GEMM. + // (k = beam index in the inner product) + for (var t: u32 = 0u; t < tiles; t = t + 1u) { + let k_a = t * 16u + lid.x; + let k_b = t * 16u + lid.y; + + // LOAD: load tile data into shared memory. + // Load A tile: each thread loads one A[row, k_a]. + if (k_a < params.n_beams) { + tile_a[lid.y][lid.x] = a[row * params.n_beams + k_a]; + } else { + tile_a[lid.y][lid.x] = 0.0; + } + + // Load B tile: each thread loads one B[k_b, col]. + if (k_b < params.n_beams) { + tile_b[lid.y][lid.x] = b[k_b * params.n_freq + col]; + } else { + tile_b[lid.y][lid.x] = 0.0; + } + + workgroupBarrier(); + + // COMPUTE: partial dot product. + // Accumulate tile product: inner loop over tile dimension. + for (var k: u32 = 0u; k < 16u; k = k + 1u) { + acc = acc + tile_a[lid.y][k] * tile_b[k][lid.x]; + } + + workgroupBarrier(); + } + + // WRITE: final result to global memory. + // Normalize output: divide by beam_corrector_sum. + let norm = max(params.beam_corrector_sum, 1e-12); + c[row * params.n_freq + col] = acc / norm; +} diff --git a/models/dave_sensor_models/description/blueview_p900/model.sdf b/models/dave_sensor_models/description/blueview_p900/model.sdf index dff61f43..01d96527 100644 --- a/models/dave_sensor_models/description/blueview_p900/model.sdf +++ b/models/dave_sensor_models/description/blueview_p900/model.sdf @@ -85,6 +85,8 @@ 12 900e3 29.9e3 + + 1500 220 10 From bbf0a1e59ce56aebf88a67818907deb451ca098b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 16 Mar 2026 20:44:52 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../multibeam_sonar/MultibeamSonarSensor.cc | 10 +++++----- gazebo/dave_gz_multibeam_sonar/wgpu_vendor/package.xml | 2 +- .../wgpu_vendor/sonar_wgpu_rust/src/lib.rs | 2 +- .../wgpu_vendor/sonar_wgpu_rust/src/pipeline.rs | 8 ++++---- .../sonar_wgpu_rust/src/shaders/convert.wgsl | 4 ++-- .../sonar_wgpu_rust/src/shaders/matmul.wgsl | 2 +- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/MultibeamSonarSensor.cc b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/MultibeamSonarSensor.cc index 27841226..866248ff 100644 --- a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/MultibeamSonarSensor.cc +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/MultibeamSonarSensor.cc @@ -419,7 +419,7 @@ bool MultibeamSonarSensor::Implementation::InitializeBeamArrangement(MultibeamSo // NOTE: frameName is the TF frame name for published sonar messages (frame_id in ROS headers). // This is explicitly configured in SDF rather than derived from Gazebo's // internal sensor frame ID. Must match TF tree for RViz transforms to work correctly. - // Verify in SDF files that matches the expected TF frame name. + // Verify in SDF files that matches the expected TF frame name. this->frameName = sensorElement->Get("frameName", "forward_sonar_optical_link").first; gzmsg << "frameName: " << this->frameName << std::endl; @@ -1085,7 +1085,7 @@ cv::Mat MultibeamSonarSensor::Implementation::ComputeNormalImage(cv::Mat & depth // Calculate focal length using the actual depth image dimensions (cols) so // this function is safe to call from a background thread without accessing - // the shared pointMsg field. Avoid data race with pointMsg.width() which may + // the shared pointMsg field. Avoid data race with pointMsg.width() which may // be being written to by the render thread concurrently. double focal_length = (0.5 * depth.cols) / tan(0.5 * this->hFOV); images.at(2) = 1.0 / focal_length * depth; // for blue channel @@ -1158,7 +1158,7 @@ void MultibeamSonarSensor::Implementation::ComputeSonarImage() } } // lock_ released -> render thread can proceed immediately. - + // NOTE: reflectivityImage is used here without a lock. // This is safe for now because only the compute thread writes to it // and the render thread only reads it. If the render thread ever starts modifying @@ -1303,10 +1303,10 @@ void MultibeamSonarSensor::Implementation::ComputeSonarImage() // FEEDBACK_NEEDED/TODO: Using frameName (from SDF tag) as the ROS frame_id. // This is user-friendly but requires the SDF value to match an actual TF frame exactly. - // The old approach used frameId (Gazebo's internal sensor ID) which was more likely + // The old approach used frameId (Gazebo's internal sensor ID) which was more likely // to match the TF tree automatically. // If TF lookup errors appear in RViz, shall revert back to frameId. - + this->sonarRawDataMsg.header.frame_id = this->frameName; this->sonarRawDataMsg.header.stamp.sec = static_cast(capturedStamp.seconds()); diff --git a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/package.xml b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/package.xml index 17f5eb07..1fdbb454 100644 --- a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/package.xml +++ b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/package.xml @@ -9,4 +9,4 @@ ament_cmake - + \ No newline at end of file diff --git a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/lib.rs b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/lib.rs index a737df3f..b64f8f72 100644 --- a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/lib.rs +++ b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/lib.rs @@ -328,7 +328,7 @@ pub extern "C" fn sonar_wgpu_compute( contents: bytemuck::bytes_of(&mm_params), usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, }); - + // REMOVE THIS, UNNCESSARY // ---- Persistent data buffers: allocate once per dimension set, reuse every frame ---- let mut buf_guard = ctx.buffers.lock().unwrap(); diff --git a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/pipeline.rs b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/pipeline.rs index 30d20e9a..1596a937 100644 --- a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/pipeline.rs +++ b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/pipeline.rs @@ -165,7 +165,7 @@ pub struct GpuContext { pub fft_bgl: wgpu::BindGroupLayout, // SonarBuffers are None until first compute call when dimensions are known. - // Reallocated automatically if dimensions change between frames. + // Reallocated automatically if dimensions change between frames. // Persistent GPU buffers across frames: CUDA cudaMalloc equivalent (allocated once, reused across ≥1 frames with same dimensions). pub buffers: std::sync::Mutex>, } @@ -215,7 +215,7 @@ fn build_bgls(device: &wgpu::Device) -> ( make_storage_entry(1, true), // (depth) true = read_only make_storage_entry(2, true), // (normal) make_storage_entry(3, true), // (reflectivity) - make_storage_entry(4, false), // (out_re_i32) false = read_write + make_storage_entry(4, false), // (out_re_i32) false = read_write make_storage_entry(5, false), // (out_im_i32) make_storage_entry(6, true), // (window) ], @@ -388,7 +388,7 @@ Data flow with buffer names: │ ▼ depth_buf [n_beams × n_rays] f32 COPY_DST | STORAGE - normal_buf [n_beams × n_rays × 3] f32 COPY_DST | STORAGE + normal_buf [n_beams × n_rays × 3] f32 COPY_DST | STORAGE refl_buf [n_beams × n_rays] f32 COPY_DST | STORAGE window_buf [n_freq] f32 COPY_DST | STORAGE bc_buf [n_beams × n_beams] f32 COPY_DST | STORAGE @@ -421,4 +421,4 @@ Data flow with buffer names: stg_im [n_beams × n_freq] f32 MAP_READ | COPY_DST │ ▼ CPU reads back final result - */ \ No newline at end of file + */ \ No newline at end of file diff --git a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/convert.wgsl b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/convert.wgsl index afa4ba6f..84f66af6 100644 --- a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/convert.wgsl +++ b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/convert.wgsl @@ -1,5 +1,5 @@ -/// Fixed-point atomic accumulation in backscatter.wgsl (SCALE=1024) allows us -/// to avoid floating-point atomics, which are not widely supported on GPUs. This shader +/// Fixed-point atomic accumulation in backscatter.wgsl (SCALE=1024) allows us +/// to avoid floating-point atomics, which are not widely supported on GPUs. This shader /// converts the accumulated i32 values back to f32 by dividing by SCALE=1024, precision 0.001. @group(0) @binding(0) var in_re: array; @group(0) @binding(1) var in_im: array; diff --git a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/matmul.wgsl b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/matmul.wgsl index 41f6e226..5df59bec 100644 --- a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/matmul.wgsl +++ b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/matmul.wgsl @@ -45,7 +45,7 @@ fn main( } else { tile_a[lid.y][lid.x] = 0.0; } - + // Load B tile: each thread loads one B[k_b, col]. if (k_b < params.n_beams) { tile_b[lid.y][lid.x] = b[k_b * params.n_freq + col]; From e812a99de28b3cc967e90a28742d4bd9699a1266 Mon Sep 17 00:00:00 2001 From: Naitik Pahwa Date: Thu, 19 Mar 2026 00:43:05 +0530 Subject: [PATCH 3/4] add CUDA backend with runtime selection, rustfft migration Signed-off-by: Naitik Pahwa --- .../multibeam_sonar/CMakeLists.txt | 69 ++++-- .../multibeam_sonar/MultibeamSonarSensor.cc | 17 +- .../multibeam_sonar/MultibeamSonarSensor.hh | 3 +- .../multibeam_sonar/sonar_compute_cpu.cc | 43 +++- .../multibeam_sonar/sonar_compute_cuda.cc | 119 +++++++++++ .../multibeam_sonar/sonar_compute_cuda.hh | 26 +++ .../multibeam_sonar/sonar_compute_wgpu.cc | 4 +- .../launch/multibeam_sonar_demo.launch.py | 2 +- .../multibeam_sonar_demo/scripts/plotdata.py | 17 +- .../multibeam_sonar_system/CMakeLists.txt | 50 +++-- .../multibeam_sonar_system/package.xml | 4 +- .../wgpu_vendor/CMakeLists.txt | 13 +- .../wgpu_vendor/sonar_wgpu_rust/Cargo.lock | 58 +++++ .../wgpu_vendor/sonar_wgpu_rust/Cargo.toml | 1 + .../wgpu_vendor/sonar_wgpu_rust/src/fft.rs | 202 +++--------------- .../wgpu_vendor/sonar_wgpu_rust/src/lib.rs | 188 +++------------- .../sonar_wgpu_rust/src/pipeline.rs | 107 ++++++---- .../sonar_wgpu_rust/src/shaders/matmul.wgsl | 12 +- 18 files changed, 483 insertions(+), 452 deletions(-) create mode 100644 gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_cuda.cc create mode 100644 gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_cuda.hh diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/CMakeLists.txt b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/CMakeLists.txt index c02f0708..1849a906 100644 --- a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/CMakeLists.txt +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/CMakeLists.txt @@ -9,6 +9,9 @@ set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS TRUE CACHE BOOL cmake_policy(SET CMP0144 NEW) +set(CUDA_ARCHITECTURE "native" CACHE STRING "Target CUDA SM version(s), e.g. native or 70;80;89") +set(CMAKE_CUDA_ARCHITECTURES "${CUDA_ARCHITECTURE}") + project(multibeam_sonar) find_package(ament_cmake REQUIRED) @@ -25,6 +28,15 @@ find_package(OpenCV REQUIRED) find_package(marine_acoustic_msgs REQUIRED) find_package(cv_bridge REQUIRED) find_package(wgpu_vendor REQUIRED) +find_package(CUDAToolkit QUIET) + +if(CUDAToolkit_FOUND) + set(CMAKE_CUDA_COMPILER "${CUDAToolkit_NVCC_EXECUTABLE}" CACHE FILEPATH "CUDA compiler" FORCE) + enable_language(CUDA) + find_package(CUDA REQUIRED) + message(STATUS "CUDA ${CUDAToolkit_VERSION} found (SM ${CUDA_ARCHITECTURE}), enabling CUDA support.") + include_directories(${CUDA_INCLUDE_DIRS}) +endif() if(TARGET gz-rendering::ogre) add_definitions(-DWITH_OGRE) @@ -33,12 +45,27 @@ if(TARGET gz-rendering::ogre2) add_definitions(-DWITH_OGRE2) endif() -add_library(${PROJECT_NAME} SHARED +set(SONAR_SOURCES MultibeamSonarSensor.cc sonar_compute_cpu.cc sonar_compute_wgpu.cc + sonar_compute_cuda.cc ) +if(CUDAToolkit_FOUND) + list(APPEND SONAR_SOURCES sonar_calculation_cuda.cu) +endif() + +add_library(${PROJECT_NAME} SHARED ${SONAR_SOURCES}) + +if(CUDAToolkit_FOUND) + set_target_properties(${PROJECT_NAME} + PROPERTIES + CUDA_SEPARABLE_COMPILATION ON + ) + target_compile_definitions(${PROJECT_NAME} PRIVATE DAVE_HAS_CUDA_BACKEND=1) +endif() + target_include_directories(${PROJECT_NAME} PUBLIC ${gz-sensors_INCLUDE_DIRS} ${gz-rendering_INCLUDE_DIRS} @@ -53,12 +80,6 @@ target_include_directories(${PROJECT_NAME} PUBLIC ${cv_bridge_INCLUDE_DIRS} ) -if(DEFINED ENV{ROS_DISTRO} AND EXISTS "/opt/ros/$ENV{ROS_DISTRO}/include/cv_bridge") - target_include_directories(${PROJECT_NAME} PUBLIC "/opt/ros/$ENV{ROS_DISTRO}/include/cv_bridge") -elseif(EXISTS "/opt/ros/rolling/include/cv_bridge") - target_include_directories(${PROJECT_NAME} PUBLIC "/opt/ros/rolling/include/cv_bridge") -endif() - target_link_libraries(${PROJECT_NAME} gz-sim::gz-sim gz-sensors::gz-sensors @@ -72,6 +93,32 @@ target_link_libraries(${PROJECT_NAME} ${marine_acoustic_msgs_LIBRARIES} ) +if(TARGET cv_bridge::cv_bridge) + target_link_libraries(${PROJECT_NAME} cv_bridge::cv_bridge) +elseif(TARGET cv_bridge) + target_link_libraries(${PROJECT_NAME} cv_bridge) +elseif(cv_bridge_LIBRARIES) + target_link_libraries(${PROJECT_NAME} ${cv_bridge_LIBRARIES}) +endif() + +if(CUDAToolkit_FOUND) + find_library(CUBLAS_LIB cublas + HINTS + ${CUDAToolkit_LIBRARY_DIR} + /usr/local/cuda/lib64 + /usr/lib/x86_64-linux-gnu + /usr/lib + ) + if(NOT CUBLAS_LIB) + message(FATAL_ERROR "cuBLAS not found") + endif() + target_link_libraries(${PROJECT_NAME} + ${CUDA_LIBRARIES} + ${CUDA_CUFFT_LIBRARIES} + ${CUBLAS_LIB} + ) +endif() + if(TARGET wgpu::wgpu_vendor) target_link_libraries(${PROJECT_NAME} wgpu::wgpu_vendor) elseif(TARGET wgpu_vendor) @@ -89,14 +136,6 @@ if(DEFINED wgpu_vendor_DIR) endif() endif() -if(TARGET cv_bridge::cv_bridge) - target_link_libraries(${PROJECT_NAME} cv_bridge::cv_bridge) -elseif(TARGET cv_bridge) - target_link_libraries(${PROJECT_NAME} cv_bridge) -else() - target_link_libraries(${PROJECT_NAME} ${cv_bridge_LIBRARIES}) -endif() - if(TARGET gz-sensors::gz-sensors-rendering) target_link_libraries(${PROJECT_NAME} gz-sensors::gz-sensors-rendering) else() diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/MultibeamSonarSensor.cc b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/MultibeamSonarSensor.cc index 866248ff..a5e49e57 100644 --- a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/MultibeamSonarSensor.cc +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/MultibeamSonarSensor.cc @@ -427,10 +427,12 @@ bool MultibeamSonarSensor::Implementation::InitializeBeamArrangement(MultibeamSo if (const char * backendEnv = std::getenv("DAVE_SONAR_COMPUTE_BACKEND")) { this->requestedBackend = backendEnv; + gzmsg << "DAVE_SONAR_COMPUTE_BACKEND=" << this->requestedBackend << std::endl; } else { this->requestedBackend = "auto"; + gzmsg << "DAVE_SONAR_COMPUTE_BACKEND not set. Using default backend=auto" << std::endl; } this->computeBackend = CreateComputeBackend(this->requestedBackend); @@ -1107,14 +1109,19 @@ cv::Mat MultibeamSonarSensor::Implementation::ComputeNormalImage(cv::Mat & depth } // Precalculation of corrector sonar calculation -void MultibeamSonarSensor::Implementation::ComputeCorrector() +void MultibeamSonarSensor::Implementation::ComputeCorrector(int _snapshotWidth, int _nBeams) { - double hPixelSize = this->hFOV / (this->pointMsg.width() - 1); + if (_snapshotWidth <= 1 || _nBeams <= 0) + { + return; + } + + double hPixelSize = this->hFOV / (_snapshotWidth - 1); // Beam culling correction precalculation - for (size_t beam = 0; beam < this->nBeams; beam++) + for (int beam = 0; beam < _nBeams; beam++) { - for (size_t beam_other = 0; beam_other < this->nBeams; beam_other++) + for (int beam_other = 0; beam_other < _nBeams; beam_other++) { float azimuthBeamPattern = unnormalized_sinc( M_PI * 0.884 / hPixelSize * @@ -1148,7 +1155,7 @@ void MultibeamSonarSensor::Implementation::ComputeSonarImage() if (this->beamCorrectorSum == 0) { - ComputeCorrector(); + ComputeCorrector(depthSnapshot.cols, this->nBeams); } if (this->reflectivityImage.rows == 0) diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/MultibeamSonarSensor.hh b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/MultibeamSonarSensor.hh index 7e3b65d2..f8eb8539 100644 --- a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/MultibeamSonarSensor.hh +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/MultibeamSonarSensor.hh @@ -164,6 +164,7 @@ private: std::string sonarImageRawTopicName; std::string sonarImageTopicName; std::string frameName; + std::string frameId; // for non-optical frame id from sensor // Ray parameters int raySkips; @@ -216,7 +217,7 @@ private: const std::string & /*_format*/); void ComputeSonarImage(); cv::Mat ComputeNormalImage(cv::Mat & depth); - void ComputeCorrector(); + void ComputeCorrector(int _snapshotWidth, int _nBeams); // Connections gz::common::ConnectionPtr rayConnection; diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_cpu.cc b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_cpu.cc index b997c187..5290883c 100644 --- a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_cpu.cc +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_cpu.cc @@ -3,6 +3,7 @@ #include #include #include +#include // NOTE: This file serves two responsibilities -- it implements the CPU compute // backend (CpuComputeBackend) AND hosts the CreateComputeBackend() factory @@ -14,6 +15,7 @@ // so that the CPU and WGPU backends are fully decoupled and the factory is the // only translation unit that needs to know about both. #include "sonar_compute_wgpu.hh" +#include "sonar_compute_cuda.hh" namespace gz { @@ -146,6 +148,8 @@ class CpuComputeBackend : public ComputeBackend std::unique_ptr CreateComputeBackend(const std::string & requestedBackend) { + std::cerr << "[sonar_compute_factory] CreateComputeBackend called with: " << requestedBackend << std::endl; + std::string backend = requestedBackend; std::transform( backend.begin(), backend.end(), backend.begin(), @@ -153,25 +157,62 @@ std::unique_ptr CreateComputeBackend(const std::string & request if (backend == "wgpu") { + std::cerr << "[sonar_compute_factory] Attempting WgpuComputeBackend initialization..." << std::endl; auto wgpuBackend = std::make_unique(); SonarComputeInput probe; if (wgpuBackend->Initialize(probe)) { + std::cerr << "[sonar_compute_factory] SUCCESS: WgpuComputeBackend initialized!" << std::endl; return wgpuBackend; } + std::cerr << "[sonar_compute_factory] WGPU backend requested but FAILED to initialize." + << std::endl; return nullptr; } + if (backend == "cuda") + { + std::cerr << "[sonar_compute_factory] Attempting CudaComputeBackend initialization..." << std::endl; + auto cudaBackend = std::make_unique(); + SonarComputeInput probe; + if (cudaBackend->Initialize(probe)) + { + std::cerr << "[sonar_compute_factory] SUCCESS: CudaComputeBackend initialized!" << std::endl; + return cudaBackend; + } + std::cerr << "[sonar_compute_factory] CUDA backend requested but failed to initialize." << std::endl; + return nullptr; + } + + if (backend == "cpu") + { + std::cerr << "[sonar_compute_factory] Creating CPU backend" << std::endl; + return std::make_unique(); + } + if (backend == "auto") { - auto wgpuBackend = std::make_unique(); + std::cerr << "[sonar_compute_factory] Auto-selecting best available backend..." << std::endl; SonarComputeInput probe; + auto cudaBackend = std::make_unique(); + if (cudaBackend->Initialize(probe)) + { + std::cerr << "[sonar_compute_factory] SUCCESS: Auto-selected CUDA backend" << std::endl; + return cudaBackend; + } + + auto wgpuBackend = std::make_unique(); if (wgpuBackend->Initialize(probe)) { + std::cerr << "[sonar_compute_factory] SUCCESS: Auto-selected WGPU backend (CUDA unavailable)" + << std::endl; return wgpuBackend; } + std::cerr << "[sonar_compute_factory] SUCCESS: Auto-selected CPU backend (both CUDA and WGPU unavailable)" + << std::endl; } + std::cerr << "[sonar_compute_factory] Creating default CPU backend" << std::endl; return std::make_unique(); } diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_cuda.cc b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_cuda.cc new file mode 100644 index 00000000..8aea552e --- /dev/null +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_cuda.cc @@ -0,0 +1,119 @@ +#include "sonar_compute_cuda.hh" + +#include +#include + +#ifdef DAVE_HAS_CUDA_BACKEND +#include +#include "sonar_calculation_cuda.cuh" +#endif + +namespace gz +{ +namespace sensors +{ + +CudaComputeBackend::~CudaComputeBackend() +{ +#ifdef DAVE_HAS_CUDA_BACKEND + if (this->initialized) + { + NpsGazeboSonar::free_cuda_memory(); + } +#endif +} + +const char * CudaComputeBackend::Name() const { return "cuda"; } + +bool CudaComputeBackend::Initialize(const SonarComputeInput &) +{ +#ifdef DAVE_HAS_CUDA_BACKEND + int deviceCount = 0; + const cudaError_t err = cudaGetDeviceCount(&deviceCount); + if (err != cudaSuccess || deviceCount <= 0) + { + return false; + } + + this->initialized = true; + return true; +#else + return false; +#endif +} + +bool CudaComputeBackend::Compute(const SonarComputeInput & input, SonarComputeOutput & output) +{ +#ifdef DAVE_HAS_CUDA_BACKEND + if (!this->initialized) + { + return false; + } + + if (!input.depthImage || !input.normalImage || !input.reflectivityImage || + !input.window || !input.beamCorrector || input.elevation_angles.empty()) + { + return false; + } + + auto start = std::chrono::high_resolution_clock::now(); + + const double hPixelSize = input.hFOV / static_cast(input.nBeams - 1); + const double vPixelSize = input.vFOV / static_cast(input.nRays - 1); + + const NpsGazeboSonar::CArray2D beams = NpsGazeboSonar::sonar_calculation_wrapper( + *input.depthImage, + *input.normalImage, + hPixelSize, + vPixelSize, + input.hFOV, + input.vFOV, + hPixelSize, + input.vFOV / 180.0 * M_PI, + hPixelSize, + const_cast(input.elevation_angles.data()), + vPixelSize * (input.raySkips + 1), + input.soundSpeed, + input.maxDistance, + input.sourceLevel, + input.nBeams, + input.nRays, + input.raySkips, + input.sonarFreq, + input.bandwidth, + input.nFreq, + *input.reflectivityImage, + input.attenuation, + const_cast(input.window), + input.beamCorrector, + input.beamCorrectorSum, + false, + input.blazingFlag); + + output.nBeams = input.nBeams; + output.nFreq = input.nFreq; + output.beamSpectrum.assign( + static_cast(input.nBeams) * static_cast(input.nFreq), + std::complex(0.0f, 0.0f)); + + for (int beam = 0; beam < input.nBeams; ++beam) + { + for (int freq = 0; freq < input.nFreq; ++freq) + { + output.At(beam, freq) = beams[beam][freq]; + } + } + + auto stop = std::chrono::high_resolution_clock::now(); + output.computeMicros = + std::chrono::duration_cast(stop - start).count(); + return true; +#else + static_cast(input); + static_cast(output); + return false; +#endif +} + +} // namespace sensors +} // namespace gz diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_cuda.hh b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_cuda.hh new file mode 100644 index 00000000..7dfdb47f --- /dev/null +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_cuda.hh @@ -0,0 +1,26 @@ +#pragma once + +#include "sonar_compute_backend.hh" + +namespace gz +{ +namespace sensors +{ + +class CudaComputeBackend : public ComputeBackend +{ +public: + ~CudaComputeBackend() override; + + const char * Name() const override; + + bool Initialize(const SonarComputeInput & prototype) override; + + bool Compute(const SonarComputeInput & input, SonarComputeOutput & output) override; + +private: + bool initialized{false}; +}; + +} // namespace sensors +} // namespace gz diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_wgpu.cc b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_wgpu.cc index 698b1fd1..59cd9ed8 100644 --- a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_wgpu.cc +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_wgpu.cc @@ -68,6 +68,8 @@ bool WgpuComputeBackend::Compute(const SonarComputeInput & input, SonarComputeOu { if (!this->cpuFallback) { + std::cerr << "[sonar_wgpu] GPU unavailable on first compute -> creating CPU fallback backend" + << std::endl; this->cpuFallback = CreateComputeBackend("cpu"); if (this->cpuFallback) { @@ -112,7 +114,7 @@ bool WgpuComputeBackend::Compute(const SonarComputeInput & input, SonarComputeOu const float hPixelSize = static_cast(input.hFOV) / std::max(1, nBeams - 1); const float vPixelSize = static_cast(input.vFOV) / std::max(1, nRays - 1); - const int raySkipsFactor = std::max(1, input.raySkips); + const int raySkipsFactor = input.raySkips + 1; const float area_scaler = hPixelSize * vPixelSize * raySkipsFactor; // This transposes from OpenCV's [row=ray][col=beam] to [beam][ray] for WGPU. diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_demo/launch/multibeam_sonar_demo.launch.py b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_demo/launch/multibeam_sonar_demo.launch.py index c8f1aadc..c4bf519d 100644 --- a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_demo/launch/multibeam_sonar_demo.launch.py +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_demo/launch/multibeam_sonar_demo.launch.py @@ -62,7 +62,7 @@ def generate_launch_description(): DeclareLaunchArgument( "compute_backend", default_value="auto", - description="Sonar backend selection: auto|wgpu|cpu", + description="Sonar backend selection: auto|wgpu|cuda|cpu", ), DeclareLaunchArgument("rviz", default_value="true", description="Open RViz."), SetEnvironmentVariable("DAVE_SONAR_COMPUTE_BACKEND", compute_backend), diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_demo/scripts/plotdata.py b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_demo/scripts/plotdata.py index 675d62b5..f3ad4248 100644 --- a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_demo/scripts/plotdata.py +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_demo/scripts/plotdata.py @@ -12,21 +12,8 @@ maxRange = 5 xPlotRange = 10 yPlotRange = xPlotRange * np.cos(45 * np.pi / 180) -source_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..")) -inferred_root = source_root -candidates = [ - os.environ.get("DAVE_SOURCE_ROOT", ""), - os.getcwd(), - inferred_root, - os.path.join(inferred_root, "src", "dave"), -] -source_root = inferred_root -for candidate in candidates: - if candidate and os.path.exists(os.path.join(candidate, "models", "dave_worlds", "worlds")): - source_root = candidate - break - -results_dir = os.path.join(source_root, "results") + +results_dir = os.path.join(os.path.dirname(__file__), "..", "..", "..", "..", "results") os.makedirs(results_dir, exist_ok=True) filename = os.path.join(results_dir, "SonarRawData_000001.csv") diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_system/CMakeLists.txt b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_system/CMakeLists.txt index cf18b197..3787c76d 100644 --- a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_system/CMakeLists.txt +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_system/CMakeLists.txt @@ -2,23 +2,26 @@ cmake_minimum_required(VERSION 3.11.0 FATAL_ERROR) project(multibeam_sonar_system) find_package(ament_cmake REQUIRED) -find_package(gz-common REQUIRED COMPONENTS profiler) -find_package(gz-plugin REQUIRED COMPONENTS register) -find_package(gz-rendering REQUIRED OPTIONAL_COMPONENTS ogre ogre2) -find_package(gz-sim REQUIRED) -find_package(gz-sensors REQUIRED) - -if(TARGET gz-rendering::ogre) - add_definitions(-DWITH_OGRE) -endif() +find_package(CUDAToolkit QUIET) -if(TARGET gz-rendering::ogre2) - add_definitions(-DWITH_OGRE2) -endif() +if(CUDAToolkit_FOUND) + find_package(gz-cmake REQUIRED) + find_package(gz-common REQUIRED COMPONENTS profiler) + find_package(gz-plugin REQUIRED COMPONENTS register) + find_package(gz-rendering REQUIRED OPTIONAL_COMPONENTS ogre ogre2) + find_package(gz-sim REQUIRED) + find_package(gz-sensors REQUIRED) + + if(TARGET gz-rendering::ogre) + add_definitions(-DWITH_OGRE) + endif() -add_subdirectory(../multibeam_sonar ${CMAKE_BINARY_DIR}/multibeam_sonar) + if(TARGET gz-rendering::ogre2) + add_definitions(-DWITH_OGRE2) + endif() + + add_subdirectory(../multibeam_sonar ${CMAKE_BINARY_DIR}/multibeam_sonar) -if(TARGET multibeam_sonar) add_library(${PROJECT_NAME} SHARED MultibeamSonarSystem.cc) target_link_libraries(${PROJECT_NAME} gz-common::gz-common @@ -27,12 +30,23 @@ if(TARGET multibeam_sonar) gz-sensors::gz-sensors multibeam_sonar ) - target_include_directories(${PROJECT_NAME} PUBLIC ../multibeam_sonar) + target_include_directories(${PROJECT_NAME} + PUBLIC + ../multibeam_sonar + ) + + # Install targets + install(TARGETS ${PROJECT_NAME} + DESTINATION lib/${PROJECT_NAME} + ) + + # Environment hooks + ament_environment_hooks( + "${CMAKE_CURRENT_SOURCE_DIR}/hooks/${PROJECT_NAME}.dsv.in" + ) - install(TARGETS ${PROJECT_NAME} DESTINATION lib/${PROJECT_NAME}) - ament_environment_hooks("${CMAKE_CURRENT_SOURCE_DIR}/hooks/${PROJECT_NAME}.dsv.in") else() - message(STATUS "multibeam_sonar target unavailable: Skipping multibeam_sonar_system") + message(STATUS "CUDA Toolkit not found: Skipping CUDA-specific targets") endif() ament_package() diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_system/package.xml b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_system/package.xml index 6f921a21..352de2b5 100644 --- a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_system/package.xml +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar_system/package.xml @@ -8,10 +8,8 @@ Gaurav kumar Apache 2.0 geometry_msgs - ament_cmake - multibeam_sonar + ament_cmake ament_lint_auto - ament_lint_common ament_cmake diff --git a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/CMakeLists.txt b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/CMakeLists.txt index 511b06e1..3a28cf79 100644 --- a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/CMakeLists.txt +++ b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/CMakeLists.txt @@ -2,7 +2,9 @@ cmake_minimum_required(VERSION 3.11) project(wgpu_vendor) find_package(ament_cmake REQUIRED) -find_package(Vulkan REQUIRED) + +# WGPU can use Metal, DirectX, or Vulkan +find_package(Vulkan QUIET) find_program(CARGO_EXECUTABLE cargo REQUIRED) @@ -38,7 +40,10 @@ add_dependencies(sonar_wgpu_rust_static sonar_wgpu_rust) add_library(wgpu_vendor INTERFACE) add_dependencies(wgpu_vendor sonar_wgpu_rust_static) -target_link_libraries(wgpu_vendor INTERFACE sonar_wgpu_rust_static dl pthread m Vulkan::Vulkan) +target_link_libraries(wgpu_vendor INTERFACE sonar_wgpu_rust_static dl pthread m) +if(Vulkan_FOUND) + target_link_libraries(wgpu_vendor INTERFACE Vulkan::Vulkan) +endif() add_library(wgpu::wgpu_vendor ALIAS wgpu_vendor) @@ -51,5 +56,7 @@ install(EXPORT wgpu_vendorTargets DESTINATION share/${PROJECT_NAME}/cmake) ament_export_targets(wgpu_vendorTargets HAS_LIBRARY_TARGET) -ament_export_dependencies(Vulkan) +if(Vulkan_FOUND) + ament_export_dependencies(Vulkan) +endif() ament_package() diff --git a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/Cargo.lock b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/Cargo.lock index 9e8ac211..9e3c585d 100644 --- a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/Cargo.lock +++ b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/Cargo.lock @@ -549,6 +549,24 @@ dependencies = [ "jni-sys", ] +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -626,6 +644,15 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8cf8e6a8aa66ce33f63993ffc4ea4271eb5b0530a9002db8455ea6050c77bfa" +[[package]] +name = "primal-check" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc0d895b311e3af9902528fbb8f928688abbd95872819320517cc24ca6b2bd08" +dependencies = [ + "num-integer", +] + [[package]] name = "proc-macro2" version = "1.0.106" @@ -703,6 +730,20 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustfft" +version = "6.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21db5f9893e91f41798c88680037dba611ca6674703c1a18601b01a72c8adb89" +dependencies = [ + "num-complex", + "num-integer", + "num-traits", + "primal-check", + "strength_reduce", + "transpose", +] + [[package]] name = "rustversion" version = "1.0.22" @@ -743,6 +784,7 @@ dependencies = [ "bytemuck", "pollster", "rayon", + "rustfft", "wgpu", ] @@ -761,6 +803,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "strength_reduce" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82" + [[package]] name = "syn" version = "1.0.109" @@ -812,6 +860,16 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "transpose" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ad61aed86bc3faea4300c7aee358b4c6d0c8d6ccc36524c96e4c92ccf26e77e" +dependencies = [ + "num-integer", + "strength_reduce", +] + [[package]] name = "unicode-ident" version = "1.0.24" diff --git a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/Cargo.toml b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/Cargo.toml index e09be284..6d8fc301 100644 --- a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/Cargo.toml +++ b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/Cargo.toml @@ -11,6 +11,7 @@ wgpu = { version = "0.20" } bytemuck = { version = "1", features = ["derive"] } pollster = "0.3" rayon = "1" +rustfft = "6" [profile.release] opt-level = 3 diff --git a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/fft.rs b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/fft.rs index 1bfe5103..ad40dfc8 100644 --- a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/fft.rs +++ b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/fft.rs @@ -1,164 +1,29 @@ -// Cooley-Tukey FFT and Bluestein's chirp-Z transform for non-power-of-two sizes +// CPU FFT using rustfft for both power-of-two and arbitrary lengths. -/// CPU FFT: WGPU port of CUDA cuFFT. -/// Power-of-2 N: Cooley-Tukey O(N log N). Arbitrary N: Bluestein chirp-Z O(N log M). -/// CPU used for validation and CUDA non-power-of-2 fallback (GPU shader: power-of-2 only, N <= 4096). use crate::ComplexF; use rayon::prelude::*; +use rustfft::{num_complex::Complex, FftPlanner}; -fn is_power_of_two(x: usize) -> bool { - x != 0 && (x & (x - 1)) == 0 -} - -fn bit_reverse(mut x: usize, bits: usize) -> usize { - let mut y = 0usize; - for _ in 0..bits { - y = (y << 1) | (x & 1); - x >>= 1; - } - y -} - -/// Cooley-Tukey FFT: CUDA cuFFT Cooley-Tukey algorithm (CPU reference). -/// Requires N = power of 2. Use bluestein_fft() for arbitrary N (CUDA extension). -fn fft_in_place(data: &mut [ComplexF]) { - let n = data.len(); - if n <= 1 { - return; - } - - let bits = (n as f32).log2() as usize; - for i in 0..n { - let j = bit_reverse(i, bits); - if j > i { - data.swap(i, j); - } - } - - let mut len = 2usize; - while len <= n { - let ang = -2.0f32 * std::f32::consts::PI / len as f32; - let wlen_re = ang.cos(); - let wlen_im = ang.sin(); - - let mut i = 0usize; - while i < n { - let mut w_re = 1.0f32; - let mut w_im = 0.0f32; - let half = len / 2; - for j in 0..half { - let u = data[i + j]; - let t = data[i + j + half]; - - let v_re = t.re * w_re - t.im * w_im; - let v_im = t.re * w_im + t.im * w_re; - - data[i + j] = ComplexF { - re: u.re + v_re, - im: u.im + v_im, - }; - data[i + j + half] = ComplexF { - re: u.re - v_re, - im: u.im - v_im, - }; - - let nw_re = w_re * wlen_re - w_im * wlen_im; - let nw_im = w_re * wlen_im + w_im * wlen_re; - w_re = nw_re; - w_im = nw_im; - } - i += len; - } - len <<= 1; - } -} - -/// Inverse FFT: Matches CUDA cuFFT conjugate-symmetry trick. IFFT(x) = conj(FFT(conj(x))) / N. -/// Requires power-of-2 N matching forward FFT (CUDA cufftExecC2C standard). -fn ifft_in_place(data: &mut [ComplexF]) { - let n = data.len(); - for x in data.iter_mut() { - x.im = -x.im; - } - fft_in_place(data); - let inv = 1.0f32 / n as f32; - for x in data.iter_mut() { - x.re *= inv; - x.im = -x.im * inv; - } -} - -/// Bluestein chirp-Z: Non-CUDA extension for arbitrary N DFT. O(N log M) where M = smallest 2^k >= 2N-1. -/// ~30-50x faster than naive O(N²) DFT. CUDA cuFFT only supports power-of-2; this is CPU addon. -fn bluestein_fft(data: &mut [ComplexF]) { - let n = data.len(); - if n <= 1 { - return; - } - - // M = smallest power-of-two >= 2*n - 1 (ensures linear convolution fits) - let mut m = 1usize; - while m < 2 * n - 1 { - m <<= 1; - } - - // Chirp sequence: w[k] = exp(-j*pi*k^2/n) - // stored as (cos, sin) where the complex value is (cos, -sin) = cos - j*sin - let chirp: Vec<(f32, f32)> = (0..n) - .map(|k| { - let theta = std::f32::consts::PI * (k * k) as f32 / n as f32; - (theta.cos(), theta.sin()) // (cos θ, sin θ) so chirp = cos - j·sin - }) - .collect(); - - // a[k] = data[k] * chirp[k] = data[k] * exp(-j*pi*k^2/n), zero-padded to M - let mut a = vec![ComplexF::default(); m]; - for k in 0..n { - let (c, s) = chirp[k]; // chirp[k] = (c, -s) as complex - a[k].re = data[k].re * c + data[k].im * s; // Re[(a+jb)(c-js)] = ac+bs - a[k].im = data[k].im * c - data[k].re * s; // Im[(a+jb)(c-js)] = bc-as - } - - // h[k] = conj(chirp[k]) = exp(+j*pi*k^2/n), with wrap-around: h[M-k] = h[k] - let mut h = vec![ComplexF::default(); m]; - for k in 0..n { - let (c, s) = chirp[k]; - h[k] = ComplexF { re: c, im: s }; // conj(chirp[k]) = cos + j*sin - if k > 0 { - h[m - k] = h[k]; // symmetric fill for circular convolution - } - } - - // Circular convolution via FFT: Y = IFFT(FFT(a) * FFT(h)) - fft_in_place(&mut a); - fft_in_place(&mut h); - for i in 0..m { - let re = a[i].re * h[i].re - a[i].im * h[i].im; - let im = a[i].re * h[i].im + a[i].im * h[i].re; - a[i] = ComplexF { re, im }; - } - ifft_in_place(&mut a); - - // X[k] = a[k] * chirp[k] = a[k] * exp(-j*pi*k^2/n) - for k in 0..n { - let (c, s) = chirp[k]; - data[k].re = a[k].re * c + a[k].im * s; - data[k].im = a[k].im * c - a[k].re * s; - } -} - -/// Batched FFT: CPU dispatcher matching CUDA cuFFT batching. Routes to GPU (power-of-2 && <= 4096) or CPU fallback. +/// Batched FFT preserving the existing public API used by lib.rs. pub fn fft_batched(input: &[ComplexF], n_beams: usize, n_freq: usize) -> Vec { - // Collect into per-beam rows, transform in parallel, then flatten back. let mut rows: Vec> = (0..n_beams) .map(|b| input[b * n_freq..(b + 1) * n_freq].to_vec()) .collect(); + let mut planner = FftPlanner::::new(); + let fft = planner.plan_fft_forward(n_freq); + rows.par_iter_mut().for_each(|row| { - if is_power_of_two(row.len()) { - fft_in_place(row); - } else { - bluestein_fft(row); + let mut rustfft_row: Vec> = row + .iter() + .map(|x| Complex:: { re: x.re, im: x.im }) + .collect(); + + fft.process(&mut rustfft_row); + + for (dst, src) in row.iter_mut().zip(rustfft_row.iter()) { + dst.re = src.re; + dst.im = src.im; } }); @@ -196,14 +61,14 @@ mod tests { } #[test] - fn bluestein_matches_naive_dft_n3() { - let mut x_b = vec![ + fn rustfft_matches_naive_dft_n3() { + let x = vec![ ComplexF { re: 1.0, im: 0.0 }, ComplexF { re: 2.0, im: -1.0 }, ComplexF { re: 0.5, im: 3.0 }, ]; - let mut x_d = x_b.clone(); - bluestein_fft(&mut x_b); + let x_b = fft_batched(&x, 1, 3); + let mut x_d = x.clone(); naive_dft(&mut x_d); for (b, d) in x_b.iter().zip(x_d.iter()) { assert!(nearly_eq(b.re, d.re), "re: {} vs {}", b.re, d.re); @@ -212,7 +77,7 @@ mod tests { } #[test] - fn bluestein_matches_naive_dft_n399() { + fn rustfft_matches_naive_dft_n399() { use std::f32::consts::PI; let n = 399usize; // Construct a chirp-like test signal @@ -222,9 +87,8 @@ mod tests { im: ((k as f32) * PI / 75.0).sin(), }) .collect(); - let mut x_b = input.clone(); + let x_b = fft_batched(&input, 1, n); let mut x_d = input.clone(); - bluestein_fft(&mut x_b); naive_dft(&mut x_d); // Compare first 5 and last 5 bins for &i in &[0, 1, 2, 3, 4, 394, 395, 396, 397, 398] { @@ -236,33 +100,15 @@ mod tests { } #[test] - fn bluestein_impulse_gives_constant_spectrum() { + fn rustfft_impulse_gives_constant_spectrum() { // DFT of [1, 0, 0, ..., 0] = [1, 1, 1, ..., 1] let n = 399usize; let mut x = vec![ComplexF::default(); n]; x[0].re = 1.0; - bluestein_fft(&mut x); + let x = fft_batched(&x, 1, n); for (i, v) in x.iter().enumerate() { assert!(nearly_eq(v.re, 1.0), "bin {i} re = {} (expected 1)", v.re); assert!(nearly_eq(v.im, 0.0), "bin {i} im = {} (expected 0)", v.im); } } } - - -/* - -X[k] = Σ x[n] · e^(-j·2π·kn/N) - - = Σ x[n] · e^(-jπ(k² + n² - (k-n)²)/N) - - = e^(-jπk²/N) · Σ [x[n] · e^(-jπn²/N)] · e^(+jπ(k-n)²/N) - ─────────── ───────────────────── ──────────────── - post-chirp a[n] (pre-chirped) h[k-n] (filter) - -So the full Bluestein algorithm is: - 1. a[n] = x[n] · e^(-jπn²/N) premultiply by chirp - 2. Y[k] = (a ★ h)[k] convolve with conjugate chirp filter - 3. X[k] = Y[k] · e^(-jπk²/N) postmultiply by chirp - -*/ \ No newline at end of file diff --git a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/lib.rs b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/lib.rs index b64f8f72..9b4e5c81 100644 --- a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/lib.rs +++ b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/lib.rs @@ -59,165 +59,6 @@ struct FftParams { log2_n: u32, } -// REMOVE THIS, UNNCESSARY -/// Single-buffer i32 readback kept for CPU-fallback / test paths. -#[allow(dead_code)] -fn readback_i32( - device: &wgpu::Device, - queue: &wgpu::Queue, - src: &wgpu::Buffer, - len: usize, -) -> Option> { - let bytes = (len * std::mem::size_of::()) as u64; - let staging = device.create_buffer(&wgpu::BufferDescriptor { - label: Some("readback_i32"), - size: bytes, - usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, - mapped_at_creation: false, - }); - let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { - label: Some("readback_i32_encoder"), - }); - encoder.copy_buffer_to_buffer(src, 0, &staging, 0, bytes); - queue.submit(std::iter::once(encoder.finish())); - device.poll(wgpu::Maintain::Wait); - let slice = staging.slice(..); - slice.map_async(wgpu::MapMode::Read, |_| {}); - device.poll(wgpu::Maintain::Wait); - let data = slice.get_mapped_range(); - let out = bytemuck::cast_slice::(&data).to_vec(); - drop(data); - staging.unmap(); - Some(out) -} - -#[allow(dead_code)] -/// Read two i32 buffers back to CPU in **one** encoder+submit+poll pair. -/// Returns (re_vec, im_vec). -fn readback_i32_pair( - device: &wgpu::Device, - queue: &wgpu::Queue, - src_re: &wgpu::Buffer, - src_im: &wgpu::Buffer, - len: usize, -) -> Option<(Vec, Vec)> { - let bytes = (len * std::mem::size_of::()) as u64; - let stg_re = device.create_buffer(&wgpu::BufferDescriptor { - label: Some("stg_re"), - size: bytes, - usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, - mapped_at_creation: false, - }); - let stg_im = device.create_buffer(&wgpu::BufferDescriptor { - label: Some("stg_im"), - size: bytes, - usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, - mapped_at_creation: false, - }); - let mut enc = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { - label: Some("readback_i32_pair"), - }); - enc.copy_buffer_to_buffer(src_re, 0, &stg_re, 0, bytes); - enc.copy_buffer_to_buffer(src_im, 0, &stg_im, 0, bytes); - queue.submit(std::iter::once(enc.finish())); - device.poll(wgpu::Maintain::Wait); - - stg_re.slice(..).map_async(wgpu::MapMode::Read, |_| {}); - stg_im.slice(..).map_async(wgpu::MapMode::Read, |_| {}); - device.poll(wgpu::Maintain::Wait); - - let re_data = stg_re.slice(..).get_mapped_range(); - let im_data = stg_im.slice(..).get_mapped_range(); - let re = bytemuck::cast_slice::(&re_data).to_vec(); - let im = bytemuck::cast_slice::(&im_data).to_vec(); - drop(re_data); - drop(im_data); - stg_re.unmap(); - stg_im.unmap(); - Some((re, im)) -} - -// REMOVE THIS, UNNCESSARY -#[allow(dead_code)] -/// Read two f32 buffers back to CPU in **one** encoder+submit+poll pair. -fn readback_f32_pair( - device: &wgpu::Device, - queue: &wgpu::Queue, - src_re: &wgpu::Buffer, - src_im: &wgpu::Buffer, - len: usize, -) -> Option<(Vec, Vec)> { - let bytes = (len * std::mem::size_of::()) as u64; - let stg_re = device.create_buffer(&wgpu::BufferDescriptor { - label: Some("stg_re_f32"), - size: bytes, - usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, - mapped_at_creation: false, - }); - let stg_im = device.create_buffer(&wgpu::BufferDescriptor { - label: Some("stg_im_f32"), - size: bytes, - usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, - mapped_at_creation: false, - }); - let mut enc = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { - label: Some("readback_f32_pair"), - }); - enc.copy_buffer_to_buffer(src_re, 0, &stg_re, 0, bytes); - enc.copy_buffer_to_buffer(src_im, 0, &stg_im, 0, bytes); - queue.submit(std::iter::once(enc.finish())); - device.poll(wgpu::Maintain::Wait); - - stg_re.slice(..).map_async(wgpu::MapMode::Read, |_| {}); - stg_im.slice(..).map_async(wgpu::MapMode::Read, |_| {}); - device.poll(wgpu::Maintain::Wait); - - let re_data = stg_re.slice(..).get_mapped_range(); - let im_data = stg_im.slice(..).get_mapped_range(); - let re = bytemuck::cast_slice::(&re_data).to_vec(); - let im = bytemuck::cast_slice::(&im_data).to_vec(); - drop(re_data); - drop(im_data); - stg_re.unmap(); - stg_im.unmap(); - Some((re, im)) -} - -// REMOVE THIS, UNNCESSARY -/// Single-buffer f32 readback kept for CPU-fallback / test paths. -#[allow(dead_code)] -fn readback_f32( - device: &wgpu::Device, - queue: &wgpu::Queue, - src: &wgpu::Buffer, - len: usize, -) -> Option> { - let bytes = (len * std::mem::size_of::()) as u64; - let staging = device.create_buffer(&wgpu::BufferDescriptor { - label: Some("readback_f32"), - size: bytes, - usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, - mapped_at_creation: false, - }); - - let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { - label: Some("readback_f32_encoder"), - }); - encoder.copy_buffer_to_buffer(src, 0, &staging, 0, bytes); - queue.submit(std::iter::once(encoder.finish())); - device.poll(wgpu::Maintain::Wait); - - let slice = staging.slice(..); - slice.map_async(wgpu::MapMode::Read, |_| {}); - device.poll(wgpu::Maintain::Wait); - - let data = slice.get_mapped_range(); - let out = bytemuck::cast_slice::(&data).to_vec(); - drop(data); - staging.unmap(); - Some(out) -} - /// The function receives flat arrays from C++ and returns a heap-allocated array. #[no_mangle] pub extern "C" fn sonar_wgpu_compute( @@ -504,10 +345,35 @@ pub extern "C" fn sonar_wgpu_compute( // map_async is asynchronous -> it requests CPU access to the buffer but doesn't block. // Map staging buffers (GPU work already done above) -> POLL 2 - buf.stg_re.slice(..).map_async(wgpu::MapMode::Read, |_| {}); - buf.stg_im.slice(..).map_async(wgpu::MapMode::Read, |_| {}); + let re_ok = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)); + let im_ok = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)); + let re_ok_cb = std::sync::Arc::clone(&re_ok); + let im_ok_cb = std::sync::Arc::clone(&im_ok); + + buf.stg_re + .slice(..) + .map_async(wgpu::MapMode::Read, move |r| { + re_ok_cb.store(r.is_ok(), std::sync::atomic::Ordering::Release); + }); + buf.stg_im + .slice(..) + .map_async(wgpu::MapMode::Read, move |r| { + im_ok_cb.store(r.is_ok(), std::sync::atomic::Ordering::Release); + }); device.poll(wgpu::Maintain::Wait); + let re_mapped = re_ok.load(std::sync::atomic::Ordering::Acquire); + let im_mapped = im_ok.load(std::sync::atomic::Ordering::Acquire); + if !re_mapped || !im_mapped { + eprintln!( + "[sonar_wgpu] map_async failed: stg_re_ok={}, stg_im_ok={}", + re_mapped, im_mapped + ); + buf.stg_re.unmap(); + buf.stg_im.unmap(); + return std::ptr::null_mut(); + } + let (mut p_re, mut p_im) = { let re_data = buf.stg_re.slice(..).get_mapped_range(); let im_data = buf.stg_im.slice(..).get_mapped_range(); diff --git a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/pipeline.rs b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/pipeline.rs index 1596a937..8fafdcdd 100644 --- a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/pipeline.rs +++ b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/pipeline.rs @@ -281,52 +281,44 @@ fn compile_pipeline( }) } -/// GPU context initialization: Vulkan adapter enumeration, device request. -fn init_gpu_context() -> Option { - // TODO: These environment variable overrides are NVIDIA/Vulkan-specific (Linux only). - // Replace with wgpu::Backends::all() so the adapter selection works automatically - // on any platform (Metal on macOS, DX12 on Windows, Vulkan on Linux) without - // needing manual ICD path configuration. - if std::env::var("VK_ICD_FILENAMES").is_err() { - unsafe { - std::env::set_var("VK_ICD_FILENAMES", "/usr/share/vulkan/icd.d/nvidia_icd.json"); - } - } - if std::env::var("VK_LAYER_PATH").is_err() { - unsafe { - std::env::set_var("DISABLE_LAYER_NV_optimus", "1"); - } - } - - // Catch panic to prevent GPU init failure from crashing Gazebo. +#[cfg(target_os = "linux")] +const BACKEND_ATTEMPTS: &[(wgpu::Backends, &str)] = &[ + (wgpu::Backends::VULKAN, "vulkan"), + (wgpu::Backends::all(), "all"), +]; + +#[cfg(target_os = "windows")] +const BACKEND_ATTEMPTS: &[(wgpu::Backends, &str)] = &[ + (wgpu::Backends::DX12, "dx12"), + (wgpu::Backends::all(), "all"), +]; + +#[cfg(target_os = "macos")] +const BACKEND_ATTEMPTS: &[(wgpu::Backends, &str)] = &[ + (wgpu::Backends::METAL, "metal"), + (wgpu::Backends::all(), "all"), +]; + +#[cfg(not(any(target_os = "linux", target_os = "windows", target_os = "macos")))] +const BACKEND_ATTEMPTS: &[(wgpu::Backends, &str)] = &[(wgpu::Backends::all(), "all")]; + +fn try_init_gpu_context(backends: wgpu::Backends, label: &str) -> Option { + eprintln!("[sonar_wgpu] Trying backend set: {label}"); + + // Catch panic to prevent GPU init failure from crashing Gazebo.Whtat let result = std::panic::catch_unwind(|| { - // TODO: Replace wgpu::Backends::VULKAN with wgpu::Backends::all() to support - // Metal (macOS), DX12 (Windows), and other backends automatically. - // The explicit VULKAN flag means this will fail silently on non-Vulkan platforms. let instance = wgpu::Instance::new(wgpu::InstanceDescriptor { - backends: wgpu::Backends::VULKAN, + backends, ..Default::default() }); - // TODO: Same as above — enumerate_adapters(wgpu::Backends::all()) would pick up - // the best available backend on any platform instead of Vulkan-only. - let adapters: Vec<_> = instance - .enumerate_adapters(wgpu::Backends::VULKAN) - .into_iter() - .collect(); - - eprintln!("[sonar_wgpu] Vulkan adapters found: {}", adapters.len()); - for a in &adapters { - let info = a.get_info(); - eprintln!("[sonar_wgpu] {:?} -> {}", info.device_type, info.name); - } - - let adapter = adapters - .iter() - .find(|a| a.get_info().device_type == wgpu::DeviceType::DiscreteGpu) - .or_else(|| adapters.first())?; + let adapter = pollster::block_on(instance.request_adapter(&wgpu::RequestAdapterOptions { + power_preference: wgpu::PowerPreference::HighPerformance, + compatible_surface: None, + force_fallback_adapter: false, + }))?; - eprintln!("[sonar_wgpu] Selected adapter: {}", adapter.get_info().name); + eprintln!("[sonar_wgpu] [{label}] selected adapter: {}", adapter.get_info().name); let (device, queue) = pollster::block_on(adapter.request_device( &wgpu::DeviceDescriptor { label: Some("sonar_wgpu_device"), @@ -336,7 +328,7 @@ fn init_gpu_context() -> Option { None, )) .map_err(|e| { - eprintln!("[sonar_wgpu] request_device failed: {e}"); + eprintln!("[sonar_wgpu] [{label}] request_device failed: {e}"); e }) .ok()?; @@ -360,20 +352,47 @@ fn init_gpu_context() -> Option { &device, "fft_pipeline", include_str!("shaders/fft.wgsl"), &fft_bgl, ); - eprintln!("[sonar_wgpu] GPU pipelines compiled in {:.0} ms -> ready.", t0.elapsed().as_millis()); + eprintln!( + "[sonar_wgpu] [{label}] GPU pipelines compiled in {:.0} ms -> ready.", + t0.elapsed().as_millis() + ); - Some(GpuContext { device, queue, bs_pipeline, bs_bgl, convert_pipeline, convert_bgl, mm_pipeline, mm_bgl, fft_pipeline, fft_bgl, buffers: std::sync::Mutex::new(None) }) + Some(GpuContext { + device, + queue, + bs_pipeline, + bs_bgl, + convert_pipeline, + convert_bgl, + mm_pipeline, + mm_bgl, + fft_pipeline, + fft_bgl, + buffers: std::sync::Mutex::new(None), + }) }); match result { Ok(ctx) => ctx, Err(_) => { - eprintln!("[sonar_wgpu] GPU init panicked; falling back to CPU path"); + eprintln!("[sonar_wgpu] [{label}] GPU init panicked; trying next backend set"); None } } } +/// GPU context initialization: adapter enumeration, device request. +fn init_gpu_context() -> Option { + for (backends, label) in BACKEND_ATTEMPTS { + if let Some(ctx) = try_init_gpu_context(*backends, label) { + return Some(ctx); + } + } + + eprintln!("[sonar_wgpu] GPU init failed for all backend sets; falling back to CPU path"); + None +} + /// Global singleton: lazily initializes GPU context on first access. /// Returns None if GPU unavailable; falls back to CPU implementation. pub fn get_or_init() -> Option<&'static GpuContext> { diff --git a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/matmul.wgsl b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/matmul.wgsl index 5df59bec..b1e27318 100644 --- a/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/matmul.wgsl +++ b/gazebo/dave_gz_multibeam_sonar/wgpu_vendor/sonar_wgpu_rust/src/shaders/matmul.wgsl @@ -24,9 +24,7 @@ fn main( ) { let row = gid.y; let col = gid.x; - if (row >= params.n_beams || col >= params.n_freq) { - return; - } + let in_bounds = (row < params.n_beams) && (col < params.n_freq); // Accumulator: accumulates partial products across tiles. var acc = 0.0; @@ -40,14 +38,14 @@ fn main( // LOAD: load tile data into shared memory. // Load A tile: each thread loads one A[row, k_a]. - if (k_a < params.n_beams) { + if (in_bounds && k_a < params.n_beams) { tile_a[lid.y][lid.x] = a[row * params.n_beams + k_a]; } else { tile_a[lid.y][lid.x] = 0.0; } // Load B tile: each thread loads one B[k_b, col]. - if (k_b < params.n_beams) { + if (in_bounds && k_b < params.n_beams) { tile_b[lid.y][lid.x] = b[k_b * params.n_freq + col]; } else { tile_b[lid.y][lid.x] = 0.0; @@ -67,5 +65,7 @@ fn main( // WRITE: final result to global memory. // Normalize output: divide by beam_corrector_sum. let norm = max(params.beam_corrector_sum, 1e-12); - c[row * params.n_freq + col] = acc / norm; + if (in_bounds) { + c[row * params.n_freq + col] = acc / norm; + } } From 4f9ad661b0417724da8926b7c8d79b5b776ad038 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 18 Mar 2026 19:16:13 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../multibeam_sonar/sonar_compute_cpu.cc | 19 +++++---- .../multibeam_sonar/sonar_compute_cuda.cc | 39 +++++-------------- .../multibeam_sonar/sonar_compute_wgpu.cc | 2 +- 3 files changed, 23 insertions(+), 37 deletions(-) diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_cpu.cc b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_cpu.cc index 5290883c..92acdf0a 100644 --- a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_cpu.cc +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_cpu.cc @@ -14,8 +14,8 @@ // TODO: Move CreateComputeBackend() into its own file (e.g. sonar_compute_factory.cc) // so that the CPU and WGPU backends are fully decoupled and the factory is the // only translation unit that needs to know about both. -#include "sonar_compute_wgpu.hh" #include "sonar_compute_cuda.hh" +#include "sonar_compute_wgpu.hh" namespace gz { @@ -148,8 +148,9 @@ class CpuComputeBackend : public ComputeBackend std::unique_ptr CreateComputeBackend(const std::string & requestedBackend) { - std::cerr << "[sonar_compute_factory] CreateComputeBackend called with: " << requestedBackend << std::endl; - + std::cerr << "[sonar_compute_factory] CreateComputeBackend called with: " << requestedBackend + << std::endl; + std::string backend = requestedBackend; std::transform( backend.begin(), backend.end(), backend.begin(), @@ -157,7 +158,8 @@ std::unique_ptr CreateComputeBackend(const std::string & request if (backend == "wgpu") { - std::cerr << "[sonar_compute_factory] Attempting WgpuComputeBackend initialization..." << std::endl; + std::cerr << "[sonar_compute_factory] Attempting WgpuComputeBackend initialization..." + << std::endl; auto wgpuBackend = std::make_unique(); SonarComputeInput probe; if (wgpuBackend->Initialize(probe)) @@ -172,7 +174,8 @@ std::unique_ptr CreateComputeBackend(const std::string & request if (backend == "cuda") { - std::cerr << "[sonar_compute_factory] Attempting CudaComputeBackend initialization..." << std::endl; + std::cerr << "[sonar_compute_factory] Attempting CudaComputeBackend initialization..." + << std::endl; auto cudaBackend = std::make_unique(); SonarComputeInput probe; if (cudaBackend->Initialize(probe)) @@ -180,7 +183,8 @@ std::unique_ptr CreateComputeBackend(const std::string & request std::cerr << "[sonar_compute_factory] SUCCESS: CudaComputeBackend initialized!" << std::endl; return cudaBackend; } - std::cerr << "[sonar_compute_factory] CUDA backend requested but failed to initialize." << std::endl; + std::cerr << "[sonar_compute_factory] CUDA backend requested but failed to initialize." + << std::endl; return nullptr; } @@ -208,7 +212,8 @@ std::unique_ptr CreateComputeBackend(const std::string & request << std::endl; return wgpuBackend; } - std::cerr << "[sonar_compute_factory] SUCCESS: Auto-selected CPU backend (both CUDA and WGPU unavailable)" + std::cerr << "[sonar_compute_factory] SUCCESS: Auto-selected CPU backend (both CUDA and WGPU " + "unavailable)" << std::endl; } diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_cuda.cc b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_cuda.cc index 8aea552e..6baa0836 100644 --- a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_cuda.cc +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_cuda.cc @@ -50,8 +50,9 @@ bool CudaComputeBackend::Compute(const SonarComputeInput & input, SonarComputeOu return false; } - if (!input.depthImage || !input.normalImage || !input.reflectivityImage || - !input.window || !input.beamCorrector || input.elevation_angles.empty()) + if ( + !input.depthImage || !input.normalImage || !input.reflectivityImage || !input.window || + !input.beamCorrector || input.elevation_angles.empty()) { return false; } @@ -62,33 +63,13 @@ bool CudaComputeBackend::Compute(const SonarComputeInput & input, SonarComputeOu const double vPixelSize = input.vFOV / static_cast(input.nRays - 1); const NpsGazeboSonar::CArray2D beams = NpsGazeboSonar::sonar_calculation_wrapper( - *input.depthImage, - *input.normalImage, - hPixelSize, - vPixelSize, - input.hFOV, - input.vFOV, - hPixelSize, - input.vFOV / 180.0 * M_PI, - hPixelSize, - const_cast(input.elevation_angles.data()), - vPixelSize * (input.raySkips + 1), - input.soundSpeed, - input.maxDistance, - input.sourceLevel, - input.nBeams, - input.nRays, - input.raySkips, - input.sonarFreq, - input.bandwidth, - input.nFreq, - *input.reflectivityImage, - input.attenuation, - const_cast(input.window), - input.beamCorrector, - input.beamCorrectorSum, - false, - input.blazingFlag); + *input.depthImage, *input.normalImage, hPixelSize, vPixelSize, input.hFOV, input.vFOV, + hPixelSize, input.vFOV / 180.0 * M_PI, hPixelSize, + const_cast(input.elevation_angles.data()), vPixelSize * (input.raySkips + 1), + input.soundSpeed, input.maxDistance, input.sourceLevel, input.nBeams, input.nRays, + input.raySkips, input.sonarFreq, input.bandwidth, input.nFreq, *input.reflectivityImage, + input.attenuation, const_cast(input.window), input.beamCorrector, + input.beamCorrectorSum, false, input.blazingFlag); output.nBeams = input.nBeams; output.nFreq = input.nFreq; diff --git a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_wgpu.cc b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_wgpu.cc index 59cd9ed8..c98afe42 100644 --- a/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_wgpu.cc +++ b/gazebo/dave_gz_multibeam_sonar/multibeam_sonar/sonar_compute_wgpu.cc @@ -68,7 +68,7 @@ bool WgpuComputeBackend::Compute(const SonarComputeInput & input, SonarComputeOu { if (!this->cpuFallback) { - std::cerr << "[sonar_wgpu] GPU unavailable on first compute -> creating CPU fallback backend" + std::cerr << "[sonar_wgpu] GPU unavailable on first compute -> creating CPU fallback backend" << std::endl; this->cpuFallback = CreateComputeBackend("cpu"); if (this->cpuFallback)