diff --git a/coretrace/CMakeLists.txt b/coretrace/CMakeLists.txt index bc237a00..85e6c02c 100644 --- a/coretrace/CMakeLists.txt +++ b/coretrace/CMakeLists.txt @@ -58,7 +58,6 @@ set(CORETRACE_SRC treemesh.cpp types.cpp vshot.cpp - simdata_bridge.cpp ) diff --git a/coretrace/simdriver/main.cpp b/coretrace/simdriver/main.cpp index 2cb9fff3..e99d4e32 100644 --- a/coretrace/simdriver/main.cpp +++ b/coretrace/simdriver/main.cpp @@ -2,21 +2,26 @@ * @file main.cpp * @brief Command-line driver for SolTrace ray tracing. * - * Reads a JSON file to configure SimulationData, runs the ray tracer, + * Reads a JSON or .stinput file to configure SimulationData, runs the ray tracer, * and writes the ray interaction records to a CSV file. * * Usage: - * simdriver [options] + * simdriver [] [options] * * Options: * --threads Number of parallel threads (default: 1) * --rays Override the number of rays from the JSON file + * --no-output Skip result retrieval and CSV output (output file not + * required when this flag is set) + * --no-csv Retrieve results but skip writing the CSV file (output + * file argument not required when this flag is set) * --embree Use the Embree runner (only available if built with * SOLTRACE_BUILD_EMBREE_SUPPORT=ON; falls back to native * runner with a warning if Embree support is absent) * --optix Use the OptiX runner (only available if built with * SOLTRACE_BUILD_OPTIX_SUPPORT=ON; falls back to native * runner with a warning if OptiX support is absent) + * --verbose Enable verbose logging in the OptiX runner */ #include @@ -47,11 +52,15 @@ using SolTrace::Runner::RunnerStatus; static void print_usage(const char *prog) { std::cerr - << "Usage: " << prog << " [options]\n" + << "Usage: " << prog << " [] [options]\n" << "\n" << "Options:\n" << " --threads Number of threads (default: 1)\n" << " --rays Override number of rays specified in the JSON file\n" + << " --no-output Skip result retrieval and CSV output\n" + << " (output file argument not required with this flag)\n" + << " --no-csv Retrieve results but skip writing the CSV file\n" + << " (output file argument not required with this flag)\n" #ifdef SOLTRACE_EMBREE_SUPPORT << " --embree Use Embree runner instead of the native runner\n" << " (requires SOLTRACE_BUILD_EMBREE_SUPPORT=ON at build time)\n" @@ -60,26 +69,54 @@ static void print_usage(const char *prog) << " --optix Use OptiX runner instead of the native runner\n" << " (requires SOLTRACE_BUILD_OPTIX_SUPPORT=ON at build time)\n" #endif + << " --verbose Enable verbose logging in the OptiX runner\n" ; } int main(int argc, char *argv[]) { - if (argc < 3) + if (argc < 2) { print_usage(argv[0]); return EXIT_FAILURE; } + // Pre-scan for --no-output and --no-csv so we know whether output_file is required + bool skip_output = false; + bool skip_csv = false; + for (int i = 2; i < argc; ++i) + { + const std::string a = argv[i]; + if (a == "--no-output") skip_output = true; + else if (a == "--no-csv") skip_csv = true; + } + + const bool file_optional = skip_output || skip_csv; + const std::string input_file = argv[1]; - const std::string output_file = argv[2]; + + // argv[2], if present and not a flag (does not start with --), is treated as + // the output file path. This allows the user to supply an output path even + // when --no-output or --no-csv is also present without it being mis-parsed + // as an unknown option. + const bool has_output_arg = (argc >= 3) && (std::string(argv[2]).rfind("--", 0) != 0); + const std::string output_file = has_output_arg ? argv[2] : ""; + const int opts_start = has_output_arg ? 3 : 2; + + if (!file_optional && !has_output_arg) + { + std::cerr << "Error: output file is required unless --no-output or --no-csv is specified\n"; + print_usage(argv[0]); + return EXIT_FAILURE; + } int num_threads = 1; long long num_rays_override = -1; // -1 means use what the JSON specifies bool use_embree = false; bool use_optix = false; + bool verbose = false; - for (int i = 3; i < argc; ++i) + for (int i = opts_start; i < argc; ++i) { const std::string arg = argv[i]; if (arg == "--threads") @@ -126,6 +163,10 @@ int main(int argc, char *argv[]) return EXIT_FAILURE; } } + else if (arg == "--no-output" || arg == "--no-csv") + { + // already handled in pre-scan; skip here + } else if (arg == "--embree") { use_embree = true; @@ -134,6 +175,10 @@ int main(int argc, char *argv[]) { use_optix = true; } + else if (arg == "--verbose") + { + verbose = true; + } else { std::cerr << "Error: unknown option '" << arg << "'\n"; @@ -143,23 +188,55 @@ int main(int argc, char *argv[]) } // ------------------------------------------------------------------------- - // Load simulation data from JSON + // Load simulation data from JSON or .stinput file // ------------------------------------------------------------------------- SimulationData simData; - try { + // Determine format by extension + auto ends_with = [](const std::string &s, const std::string &suffix) + { + return s.size() >= suffix.size() && + s.compare(s.size() - suffix.size(), suffix.size(), suffix) == 0; + }; + const bool is_stinput = ends_with(input_file, ".stinput"); + const bool is_json = ends_with(input_file, ".json"); + + if (!is_stinput && !is_json) + { + std::cerr << "Error: unrecognised input file extension (expected .json or .stinput): " + << input_file << "\n"; + return EXIT_FAILURE; + } + std::cout << "Loading simulation data from: " << input_file << "...\n"; auto t_load_start = std::chrono::steady_clock::now(); - simData.import_json_file(input_file); + + if (is_json) + { + try + { + simData.import_json_file(input_file); + } + catch (const std::exception &e) + { + std::cerr << "Error loading JSON file: " << e.what() << "\n"; + return EXIT_FAILURE; + } + } + else // .stinput + { + if (!simData.import_from_file(input_file)) + { + std::cerr << "Error loading .stinput file: " << input_file << "\n"; + return EXIT_FAILURE; + } + } + auto t_load_end = std::chrono::steady_clock::now(); std::cout << " Loaded in " << std::chrono::duration(t_load_end - t_load_start).count() - << " s\n"; - } - catch (const std::exception &e) - { - std::cerr << "Error loading JSON file: " << e.what() << "\n"; - return EXIT_FAILURE; + << " s\n" + << " Elements loaded: " << simData.get_number_of_elements() << "\n"; } // Override ray count if the user requested it @@ -217,24 +294,33 @@ int main(int argc, char *argv[]) std::cout << " Completed in " << std::chrono::duration(t_run_end - t_run_start).count() << " s\n"; + std::cout << " Rays launched: " << runner.get_number_rays_launched() << "\n"; + std::cout << " Rays traced: " << runner.get_number_rays_traced() << "\n"; - std::cout << "Retrieving results...\n"; - auto t_report_start = std::chrono::steady_clock::now(); - sts = runner.report_simulation(&result, 0); - auto t_report_end = std::chrono::steady_clock::now(); - if (sts != RunnerStatus::SUCCESS) + if (!skip_output) { - std::cerr << "Error: failed to collect simulation results\n"; - return EXIT_FAILURE; + std::cout << "Retrieving results...\n"; + auto t_report_start = std::chrono::steady_clock::now(); + sts = runner.report_simulation(&result, 0); + auto t_report_end = std::chrono::steady_clock::now(); + if (sts != RunnerStatus::SUCCESS) + { + std::cerr << "Error: failed to collect simulation results\n"; + return EXIT_FAILURE; + } + std::cout << " Retrieved in " + << std::chrono::duration(t_report_end - t_report_start).count() + << " s\n"; + } + else + { + std::cout << "Skipping result retrieval (--no-output).\n"; } - std::cout << " Retrieved in " - << std::chrono::duration(t_report_end - t_report_start).count() - << " s\n"; } else #endif #ifdef SOLTRACE_OPTIX_SUPPORT - if (use_optix) + if (use_optix) { OptixRunner runner; @@ -245,6 +331,8 @@ int main(int argc, char *argv[]) return EXIT_FAILURE; } + runner.set_verbose(verbose); + std::cout << "Using OptiX runner\n"; std::cout << "Setting up simulation...\n"; @@ -272,19 +360,33 @@ int main(int argc, char *argv[]) std::cout << " Completed in " << std::chrono::duration(t_run_end - t_run_start).count() << " s\n"; + std::cout << " Rays launched: " << runner.get_number_rays_launched() << "\n"; + std::cout << " Rays traced: " << runner.get_number_rays_traced() << "\n"; - std::cout << "Retrieving results...\n"; - auto t_report_start = std::chrono::steady_clock::now(); - sts = runner.report_simulation(&result, 0); - auto t_report_end = std::chrono::steady_clock::now(); - if (sts != RunnerStatus::SUCCESS) + if (!skip_output) { - std::cerr << "Error: failed to collect simulation results\n"; - return EXIT_FAILURE; + std::cout << "Retrieving results...\n"; + auto t_report_start = std::chrono::steady_clock::now(); + sts = runner.report_simulation(&result, 0); + auto t_report_end = std::chrono::steady_clock::now(); + if (sts != RunnerStatus::SUCCESS) + { + std::cerr << "Error: failed to collect simulation results\n"; + return EXIT_FAILURE; + } + std::cout << " Retrieved in " + << std::chrono::duration(t_report_end - t_report_start).count() + << " s\n"; + } + else + { + std::cout << "Skipping result retrieval (--no-output).\n"; + } + + if (!verbose) + { + runner.print_timing(); } - std::cout << " Retrieved in " - << std::chrono::duration(t_report_end - t_report_start).count() - << " s\n"; } else #endif @@ -341,39 +443,59 @@ int main(int argc, char *argv[]) std::cout << " Completed in " << std::chrono::duration(t_run_end - t_run_start).count() << " s\n"; + std::cout << " Rays launched: " << runner.get_number_rays_launched() << "\n"; + std::cout << " Rays traced: " << runner.get_number_rays_traced() << "\n"; - std::cout << "Retrieving results...\n"; - auto t_report_start = std::chrono::steady_clock::now(); - sts = runner.report_simulation(&result, 0); - auto t_report_end = std::chrono::steady_clock::now(); - if (sts != RunnerStatus::SUCCESS) + if (!skip_output) { - std::cerr << "Error: failed to collect simulation results\n"; - return EXIT_FAILURE; + std::cout << "Retrieving results...\n"; + auto t_report_start = std::chrono::steady_clock::now(); + sts = runner.report_simulation(&result, 0); + auto t_report_end = std::chrono::steady_clock::now(); + if (sts != RunnerStatus::SUCCESS) + { + std::cerr << "Error: failed to collect simulation results\n"; + return EXIT_FAILURE; + } + std::cout << " Retrieved in " + << std::chrono::duration(t_report_end - t_report_start).count() + << " s\n"; + } + else + { + std::cout << "Skipping result retrieval (--no-output).\n"; } - std::cout << " Retrieved in " - << std::chrono::duration(t_report_end - t_report_start).count() - << " s\n"; } // ------------------------------------------------------------------------- // Write results to CSV // ------------------------------------------------------------------------- - std::cout << "Writing " << result.get_number_of_records() - << " ray records to: " << output_file << "...\n"; - try + if (!skip_output && !skip_csv) { - auto t_write_start = std::chrono::steady_clock::now(); - result.write_csv_file(output_file); - auto t_write_end = std::chrono::steady_clock::now(); - std::cout << " Written in " - << std::chrono::duration(t_write_end - t_write_start).count() - << " s\n"; + std::cout << "Writing " << result.get_number_of_records() + << " ray records to: " << output_file << "...\n"; + try + { + auto t_write_start = std::chrono::steady_clock::now(); + result.write_csv_file(output_file); + auto t_write_end = std::chrono::steady_clock::now(); + std::cout << " Written in " + << std::chrono::duration(t_write_end - t_write_start).count() + << " s\n"; + } + catch (const std::exception &e) + { + std::cerr << "Error writing CSV file: " << e.what() << "\n"; + return EXIT_FAILURE; + } } - catch (const std::exception &e) + else if (skip_csv) { - std::cerr << "Error writing CSV file: " << e.what() << "\n"; - return EXIT_FAILURE; + std::cout << "Skipping CSV output (--no-csv).\n"; + } + else + { + std::cout << "Skipping CSV output (--no-output).\n"; } std::cout << "Done.\n"; diff --git a/coretrace/simulation_data/simulation_parameters.hpp b/coretrace/simulation_data/simulation_parameters.hpp index be15daa9..6fb66717 100644 --- a/coretrace/simulation_data/simulation_parameters.hpp +++ b/coretrace/simulation_data/simulation_parameters.hpp @@ -21,9 +21,6 @@ struct SimulationParameters // TODO: Figure out how to store time... DateTime sim_dt; - bool include_sun_shape_errors; - bool include_optical_errors; - std::uint_fast64_t number_of_rays; std::uint_fast64_t max_number_of_rays; double tolerance; @@ -33,6 +30,9 @@ struct SimulationParameters int seed; + bool include_sun_shape_errors; + bool include_optical_errors; + SimulationParameters() : number_of_rays(10000), max_number_of_rays(1000000), tolerance(0.0), diff --git a/coretrace/simulation_runner/embree_runner/embree_runner.cpp b/coretrace/simulation_runner/embree_runner/embree_runner.cpp index 52484840..5f66d9e7 100644 --- a/coretrace/simulation_runner/embree_runner/embree_runner.cpp +++ b/coretrace/simulation_runner/embree_runner/embree_runner.cpp @@ -50,8 +50,7 @@ namespace SolTrace::EmbreeRunner { // TODO: Do a more efficient implementation of this? this->clean_embree(); - NativeRunner::update_simulation(data); - return RunnerStatus::SUCCESS; + return NativeRunner::update_simulation(data); } RunnerStatus EmbreeRunner::run_simulation() diff --git a/coretrace/simulation_runner/embree_runner/ftz_daz.hpp b/coretrace/simulation_runner/embree_runner/ftz_daz.hpp new file mode 100644 index 00000000..742d4b17 --- /dev/null +++ b/coretrace/simulation_runner/embree_runner/ftz_daz.hpp @@ -0,0 +1,39 @@ +#ifndef SOLTRACE_FTZ_DAZ_HPP +#define SOLTRACE_FTZ_DAZ_HPP + +#include + +// Set Flush-to-Zero (FTZ) and Denormals-are-Zero (DAZ) floating-point flags +// for the calling thread. These are thread-local CPU register settings that +// avoid slow denormal handling in the FPU, as recommended by the Embree docs. +// On ARM, DAZ is implicit when FZ is set (no separate bit). + +#if defined(__SSE__) || defined(_M_X64) || defined(_M_IX86) +# include +// Set FTZ (bit 15, 0x8000) and DAZ (bit 6, 0x0040) via MXCSR. +// is sufficient; (SSE3) is intentionally avoided +// so this compiles on SSE-only targets (-msse without -msse3). +# define SOLTRACE_SET_FTZ_DAZ() \ + _mm_setcsr(_mm_getcsr() | 0x8040u) +#elif defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) +# if defined(_MSC_VER) +# include +# define SOLTRACE_SET_FTZ_DAZ() do { \ + uint64_t _fpcr = _ReadStatusReg(ARM64_FPCR); \ + _fpcr |= (1ULL << 24); \ + _WriteStatusReg(ARM64_FPCR, _fpcr); \ + } while(0) +# else + /* GCC / Clang on ARM64 */ +# define SOLTRACE_SET_FTZ_DAZ() do { \ + uint64_t _fpcr; \ + __asm__ __volatile__("mrs %0, fpcr" : "=r"(_fpcr)); \ + _fpcr |= (1ULL << 24); \ + __asm__ __volatile__("msr fpcr, %0" : : "r"(_fpcr)); \ + } while(0) +# endif +#else +# define SOLTRACE_SET_FTZ_DAZ() /* unsupported architecture, no-op */ +#endif + +#endif // SOLTRACE_FTZ_DAZ_HPP diff --git a/coretrace/simulation_runner/embree_runner/trace_embree.cpp b/coretrace/simulation_runner/embree_runner/trace_embree.cpp index 0e36f03a..d3720dfe 100644 --- a/coretrace/simulation_runner/embree_runner/trace_embree.cpp +++ b/coretrace/simulation_runner/embree_runner/trace_embree.cpp @@ -1,7 +1,9 @@ #include "trace_embree.hpp" #include +#include #include +#include #include #include #include @@ -27,6 +29,7 @@ #include "embree_helper.hpp" #include "find_element_hit_embree.hpp" +#include "ftz_daz.hpp" namespace SolTrace::EmbreeRunner { @@ -56,11 +59,16 @@ namespace SolTrace::EmbreeRunner // RTCScene embree_scene = nullptr; // bool use_shared_embree = false; - // Make device - // std::cout << "Making embree device..." << std::endl; - std::stringstream ss; - ss << "threads=" << nthreads; - embree_device = rtcNewDevice(ss.str().c_str()); + // // Make device + // // std::cout << "Making embree device..." << std::endl; + // std::stringstream ss; + // ss << "threads=" << nthreads; + // embree_device = rtcNewDevice(ss.str().c_str()); + + // TODO: Need to test this on largest scenes we expect to + // trace. Adding more threads does not significantly help + // when there are only ~6000 elements. + embree_device = rtcNewDevice("threads=1"); // std::cout << "Setting error function..." << std::endl; rtcSetDeviceErrorFunction(embree_device, error_function, NULL); @@ -98,9 +106,16 @@ namespace SolTrace::EmbreeRunner const RTCScene &embree_scene) { + // using Clock = std::chrono::steady_clock; + // using Seconds = std::chrono::duration; + + // auto t_start = Clock::now(); + System->RayData.SetUp(nthreads, NumberOfRays); System->SunRayCount = 0; + // auto t_after_setup = Clock::now(); + // Initialize Sun glm::dvec3 PosSunStage; bool status = SolTrace::NativeRunner::SunToPrimaryStage(logger, @@ -112,6 +127,8 @@ namespace SolTrace::EmbreeRunner if (!status) return RunnerStatus::ERROR; + // auto t_after_sun_init = Clock::now(); + uint_fast64_t rem = NumberOfRays % nthreads; uint_fast64_t nrays_per_thread = NumberOfRays / nthreads; uint_fast64_t nrays; @@ -138,7 +155,27 @@ namespace SolTrace::EmbreeRunner manager->manage(k, std::move(my_future)); } - return manager->monitor_until_completion(); + // auto t_after_launch = Clock::now(); + + RunnerStatus result = manager->monitor_until_completion(); + + // auto t_done = Clock::now(); + + // double s_setup = Seconds(t_after_setup - t_start).count(); + // double s_sun_init = Seconds(t_after_sun_init - t_after_setup).count(); + // double s_launch = Seconds(t_after_launch - t_after_sun_init).count(); + // double s_ray_trace = Seconds(t_done - t_after_launch).count(); + // double s_total = Seconds(t_done - t_start).count(); + + // std::cout << "[trace_embree] timing (nthreads=" << nthreads + // << ", rays=" << NumberOfRays << ")\n" + // << " ray_data_setup : " << s_setup << " s\n" + // << " sun_init : " << s_sun_init << " s\n" + // << " thread_launch : " << s_launch << " s\n" + // << " ray_trace : " << s_ray_trace << " s\n" + // << " total : " << s_total << " s\n"; + + return result; } RunnerStatus trace_embree_single_thread( @@ -157,6 +194,10 @@ namespace SolTrace::EmbreeRunner { // std::cout << "Thread " << thread_id << " with seed " << seed // << std::endl; + // Set flush-to-zero and denormals-are-zero for this thread to avoid + // slow denormal handling in the FPU (recommended by Embree docs). + SOLTRACE_SET_FTZ_DAZ(); + // Initialize Internal State Variables MTRand myrng(seed); @@ -180,6 +221,72 @@ namespace SolTrace::EmbreeRunner uint_fast64_t n_rays_active = NumberOfRays; uint_fast64_t sun_ray_count_local = 0; + // // Timing accumulators + // using Clock = std::chrono::steady_clock; + // using ns_t = long long; + // ns_t t_generate_ray = 0; + // ns_t t_transform_to_local = 0; + // ns_t t_find_element_hit = 0; + // ns_t t_determine_interaction = 0; + // ns_t t_process_interaction = 0; + // ns_t t_transform_to_reference = 0; + // ns_t t_ray_data_append = 0; + // ns_t t_progress_update = 0; + // uint_fast64_t n_find_element_hit = 0; + // uint_fast64_t n_determine_interaction = 0; + // uint_fast64_t n_process_interaction = 0; + // uint_fast64_t n_ray_data_append = 0; + + // auto write_timing = [&]() { + // std::string fname = "trace_embree_timing_thread_" + + // std::to_string(thread_id) + ".csv"; + // std::ofstream f(fname); + // constexpr double ns_to_s = 1.0e-9; + // ns_t t_total = t_generate_ray + t_transform_to_local + t_find_element_hit + + // t_determine_interaction + t_process_interaction + + // t_transform_to_reference + t_ray_data_append + t_progress_update; + // auto pct = [&](ns_t t) -> double { + // return t_total > 0 ? 100.0 * static_cast(t) / static_cast(t_total) : 0.0; + // }; + // f << std::fixed; + // f << "section,calls,seconds,pct_total\n"; + // f << "generate_ray,," << t_generate_ray * ns_to_s << "," << pct(t_generate_ray) << "\n"; + // f << "transform_to_local,," << t_transform_to_local * ns_to_s << "," << pct(t_transform_to_local) << "\n"; + // f << "find_element_hit," << n_find_element_hit << "," << t_find_element_hit * ns_to_s << "," << pct(t_find_element_hit) << "\n"; + // f << "determine_interaction," << n_determine_interaction << "," << t_determine_interaction * ns_to_s << "," << pct(t_determine_interaction) << "\n"; + // f << "process_interaction," << n_process_interaction << "," << t_process_interaction * ns_to_s << "," << pct(t_process_interaction) << "\n"; + // f << "transform_to_reference,,"<< t_transform_to_reference * ns_to_s << "," << pct(t_transform_to_reference) << "\n"; + // f << "ray_data_append," << n_ray_data_append << "," << t_ray_data_append * ns_to_s << "," << pct(t_ray_data_append) << "\n"; + // f << "progress_update,," << t_progress_update * ns_to_s << "," << pct(t_progress_update) << "\n"; + // f << "total,," << t_total * ns_to_s << ",100.0\n"; + // }; + + // auto write_timing = [&]() { + // // std::string fname = "trace_embree_timing_thread_" + + // // std::to_string(thread_id) + ".csv"; + // // std::ofstream f(fname); + // std::stringstream f; + // constexpr double ns_to_s = 1.0e-9; + // ns_t t_total = t_generate_ray + t_transform_to_local + t_find_element_hit + + // t_determine_interaction + t_process_interaction + + // t_transform_to_reference + t_ray_data_append + t_progress_update; + // auto pct = [&](ns_t t) -> double { + // return t_total > 0 ? 100.0 * static_cast(t) / static_cast(t_total) : 0.0; + // }; + // f << "thread_id " << thread_id << "\n" << std::fixed; + // f << "section,calls,seconds,pct_total\n"; + // f << "generate_ray,," << t_generate_ray * ns_to_s << "," << pct(t_generate_ray) << "\n"; + // f << "transform_to_local,," << t_transform_to_local * ns_to_s << "," << pct(t_transform_to_local) << "\n"; + // f << "find_element_hit," << n_find_element_hit << "," << t_find_element_hit * ns_to_s << "," << pct(t_find_element_hit) << "\n"; + // f << "determine_interaction," << n_determine_interaction << "," << t_determine_interaction * ns_to_s << "," << pct(t_determine_interaction) << "\n"; + // f << "process_interaction," << n_process_interaction << "," << t_process_interaction * ns_to_s << "," << pct(t_process_interaction) << "\n"; + // f << "transform_to_reference,,"<< t_transform_to_reference * ns_to_s << "," << pct(t_transform_to_reference) << "\n"; + // f << "ray_data_append," << n_ray_data_append << "," << t_ray_data_append * ns_to_s << "," << pct(t_ray_data_append) << "\n"; + // f << "progress_update,," << t_progress_update * ns_to_s << "," << pct(t_progress_update) << "\n"; + // f << "total,," << t_total * ns_to_s << ",100.0\n"; + // std::cout << f.str() << std::endl; + // }; + // Loop through stages for (uint_fast64_t i = 0; i < System->StageList.size(); i++) { @@ -242,9 +349,14 @@ namespace SolTrace::EmbreeRunner } // transform the global incoming ray to local stage coordinates - TransformToLocal(PosRayGlob, CosRayGlob, - Stage->Origin, Stage->RRefToLoc, - PosRayStage, CosRayStage); + { + // auto _t0 = Clock::now(); + TransformToLocal(PosRayGlob, CosRayGlob, + Stage->Origin, Stage->RRefToLoc, + PosRayStage, CosRayStage); + // t_transform_to_local += std::chrono::duration_cast( + // Clock::now() - _t0).count(); + } // Initialize internal variables for ray intersection tracing bool RayInStage = true; @@ -270,12 +382,18 @@ namespace SolTrace::EmbreeRunner while (RayInStage) { - FindElementHit_embree(embree_scene, i, RayNumber, - PosRayGlob, CosRayGlob, - LastPosRaySurfElement, LastCosRaySurfElement, - LastDFXYZ, LastElementNumber, LastRayNumber, - LastPosRaySurfStage, LastCosRaySurfStage, - ErrorFlag, LastHitBackSide, StageHit); + { + // auto _t0 = Clock::now(); + FindElementHit_embree(embree_scene, i, RayNumber, + PosRayGlob, CosRayGlob, + LastPosRaySurfElement, LastCosRaySurfElement, + LastDFXYZ, LastElementNumber, LastRayNumber, + LastPosRaySurfStage, LastCosRaySurfStage, + ErrorFlag, LastHitBackSide, StageHit); + // t_find_element_hit += std::chrono::duration_cast( + // Clock::now() - _t0).count(); + // ++n_find_element_hit; + } // Breakout if ray left stage if (!StageHit) @@ -290,6 +408,7 @@ namespace SolTrace::EmbreeRunner if (i == 0 && MultipleHitCount == 1) { // Add ray to Stage RayData + // auto _t0_append = Clock::now(); auto r = System->RayData.Append(thread_id, PosRayGlob, CosRayGlob, @@ -297,6 +416,9 @@ namespace SolTrace::EmbreeRunner i + 1, LastRayNumber, RayEvent::CREATE); + // t_ray_data_append += std::chrono::duration_cast( + // Clock::now() - _t0_append).count(); + // ++n_ray_data_append; if (r == nullptr) { @@ -324,8 +446,10 @@ namespace SolTrace::EmbreeRunner else optics = &optelm->Optics.Front; - bool good = - SolTrace::NativeRunner::determine_interaction_type( + bool good; + { + // auto _t0 = Clock::now(); + good = SolTrace::NativeRunner::determine_interaction_type( logger, i, thread_id, @@ -334,9 +458,14 @@ namespace SolTrace::EmbreeRunner LastDFXYZ, LastCosRaySurfElement, rev); + // t_determine_interaction += std::chrono::duration_cast( + // Clock::now() - _t0).count(); + // ++n_determine_interaction; + } if (!good) { + // write_timing(); return RunnerStatus::ERROR; } @@ -349,43 +478,60 @@ namespace SolTrace::EmbreeRunner // Process Interaction int k = LastElementNumber - 1; - SolTrace::NativeRunner::ProcessInteraction( - System, - myrng, - IncludeSunShape, - optics, - IncludeErrors, - i, - Stage, - MultipleHitCount, - LastDFXYZ, - LastCosRaySurfElement, - ErrorFlag, - CosRayOutElement, - LastPosRaySurfElement, - PosRayOutElement); + { + // auto _t0 = Clock::now(); + SolTrace::NativeRunner::ProcessInteraction( + System, + myrng, + IncludeSunShape, + optics, + IncludeErrors, + i, + Stage, + MultipleHitCount, + LastDFXYZ, + LastCosRaySurfElement, + ErrorFlag, + CosRayOutElement, + LastPosRaySurfElement, + PosRayOutElement); + // t_process_interaction += std::chrono::duration_cast( + // Clock::now() - _t0).count(); + // ++n_process_interaction; + } // Transform ray back to stage coordinate system - TransformToReference(PosRayOutElement, - CosRayOutElement, - Stage->ElementList[k]->Origin, - Stage->ElementList[k]->RLocToRef, - PosRayStage, - CosRayStage); - TransformToReference(PosRayStage, - CosRayStage, - Stage->Origin, - Stage->RLocToRef, - PosRayGlob, - CosRayGlob); - - System->RayData.Append(thread_id, - PosRayGlob, - CosRayGlob, - LastElementNumber, - i + 1, - LastRayNumber, - rev); + { + // auto _t0 = Clock::now(); + TransformToReference(PosRayOutElement, + CosRayOutElement, + Stage->ElementList[k]->Origin, + Stage->ElementList[k]->RLocToRef, + PosRayStage, + CosRayStage); + TransformToReference(PosRayStage, + CosRayStage, + Stage->Origin, + Stage->RLocToRef, + PosRayGlob, + CosRayGlob); + // t_transform_to_reference += std::chrono::duration_cast( + // Clock::now() - _t0).count(); + } + + { + // auto _t0 = Clock::now(); + System->RayData.Append(thread_id, + PosRayGlob, + CosRayGlob, + LastElementNumber, + i + 1, + LastRayNumber, + rev); + // t_ray_data_append += std::chrono::duration_cast( + // Clock::now() - _t0).count(); + // ++n_ray_data_append; + } // Break out if multiple hits are not allowed if (!Stage->MultiHitsPerRay) @@ -404,29 +550,47 @@ namespace SolTrace::EmbreeRunner if (update_count % update_rate == 0) { + // auto _t0 = Clock::now(); double progress = update_count / total_work; manager->progress_update(thread_id, progress); - if (manager->terminate(thread_id)) + bool should_cancel = manager->terminate(thread_id); + // t_progress_update += std::chrono::duration_cast( + // Clock::now() - _t0).count(); + if (should_cancel) + { + // write_timing(); return RunnerStatus::CANCEL; + } } // Handle if Ray was absorbed if (RayIsAbsorbed) { - TransformToReference(LastPosRaySurfStage, - LastCosRaySurfStage, - Stage->Origin, - Stage->RLocToRef, - PosRayGlob, - CosRayGlob); - - System->RayData.Append(thread_id, - PosRayGlob, - CosRayGlob, - LastElementNumber, - i + 1, - LastRayNumber, - RayEvent::ABSORB); + { + // auto _t0 = Clock::now(); + TransformToReference(LastPosRaySurfStage, + LastCosRaySurfStage, + Stage->Origin, + Stage->RLocToRef, + PosRayGlob, + CosRayGlob); + // t_transform_to_reference += std::chrono::duration_cast( + // Clock::now() - _t0).count(); + } + + { + // auto _t0 = Clock::now(); + System->RayData.Append(thread_id, + PosRayGlob, + CosRayGlob, + LastElementNumber, + i + 1, + LastRayNumber, + RayEvent::ABSORB); + // t_ray_data_append += std::chrono::duration_cast( + // Clock::now() - _t0).count(); + // ++n_ray_data_append; + } n_rays_active--; @@ -530,13 +694,19 @@ namespace SolTrace::EmbreeRunner { LastRayNumber = RayNumber; - System->RayData.Append(thread_id, - PosRayGlob, - CosRayGlob, - ELEMENT_NULL, - i + 1, - LastRayNumber, - RayEvent::EXIT); + { + // auto _t0 = Clock::now(); + System->RayData.Append(thread_id, + PosRayGlob, + CosRayGlob, + ELEMENT_NULL, + i + 1, + LastRayNumber, + RayEvent::EXIT); + // t_ray_data_append += std::chrono::duration_cast( + // Clock::now() - _t0).count(); + // ++n_ray_data_append; + } n_rays_active--; @@ -586,6 +756,7 @@ namespace SolTrace::EmbreeRunner // size_t pp = IncomingRays[PreviousStageDataArrayIndex - 1].Num; // System->errlog("LastRayNumberInPreviousStage=0, stage %d, PrevIdx=%d, CurIdx=%d, pp=%d", i + 1, // PreviousStageDataArrayIndex, StageDataArrayIndex, pp); + // write_timing(); return RunnerStatus::ERROR; } } @@ -593,6 +764,7 @@ namespace SolTrace::EmbreeRunner { // System->errlog("Invalid PreviousStageDataArrayIndex: %u, @ stage %d", // PreviousStageDataArrayIndex, i + 1); + // write_timing(); return RunnerStatus::ERROR; } } @@ -603,13 +775,19 @@ namespace SolTrace::EmbreeRunner for (uint_fast64_t k = 0; k < n_rays_active; ++k) { GlobalRay_refactored ray = IncomingRays[k]; - System->RayData.Append(thread_id, - ray.Pos, - ray.Cos, - ELEMENT_NULL, - idx + 1, - ray.Num, - RayEvent::EXIT); + { + // auto _t0 = Clock::now(); + System->RayData.Append(thread_id, + ray.Pos, + ray.Cos, + ELEMENT_NULL, + idx + 1, + ray.Num, + RayEvent::EXIT); + // t_ray_data_append += std::chrono::duration_cast( + // Clock::now() - _t0).count(); + // ++n_ray_data_append; + } } // System->SunRayCount is atomic so this is thread safe diff --git a/coretrace/simulation_runner/native_runner/native_runner.cpp b/coretrace/simulation_runner/native_runner/native_runner.cpp index dba8e915..ca6341aa 100644 --- a/coretrace/simulation_runner/native_runner/native_runner.cpp +++ b/coretrace/simulation_runner/native_runner/native_runner.cpp @@ -285,8 +285,8 @@ namespace SolTrace::NativeRunner { // TODO: Do a more efficient implementation of this? this->tsys.ClearAll(); - this->setup_simulation(data); - return RunnerStatus::SUCCESS; + return this->setup_simulation(data); + // return RunnerStatus::SUCCESS; } RunnerStatus NativeRunner::run_simulation() @@ -327,7 +327,7 @@ namespace SolTrace::NativeRunner const TSystem *sys = this->get_system(); // const TRayData ray_data = sys->AllRayData; - const TRayData ray_data = sys->RayData; + const TRayData& ray_data = sys->RayData; std::map ray_records; std::map::iterator iter; uint_fast64_t ndata = ray_data.Count(); diff --git a/coretrace/simulation_runner/native_runner/native_runner.hpp b/coretrace/simulation_runner/native_runner/native_runner.hpp index 800b4d2e..4e8387f7 100644 --- a/coretrace/simulation_runner/native_runner/native_runner.hpp +++ b/coretrace/simulation_runner/native_runner/native_runner.hpp @@ -39,6 +39,18 @@ namespace SolTrace::NativeRunner virtual RunnerStatus report_simulation(SolTrace::Result::SimulationResult *result, int level_spec) override; + virtual uint_fast64_t get_number_rays_launched() const override + { + return tsys.SunRayCount; + } + virtual uint_fast64_t get_number_rays_traced() const override + { + // TODO: This could be wrong if we hit max number of rays before getting this many hits. + // At the moment max number of rays is ignored though... + // return tsys.sim_raycount; + return tsys.SunRayCount > 0 ? tsys.sim_raycount : 0; + } + // Runner options void disable_power_tower() { this->as_power_tower = false; } void enable_power_tower() { this->as_power_tower = true; } diff --git a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/Aperture.cpp b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/Aperture.cpp index f2f136af..e1559210 100644 --- a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/Aperture.cpp +++ b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/Aperture.cpp @@ -1,4 +1,5 @@ #include "Aperture.h" +#include "constants.hpp" using namespace OptixCSP; @@ -28,12 +29,24 @@ double ApertureCircle::get_radius() const { return radius; } -double ApertureCircle::get_width() const { - return 2.0 * radius; +// double ApertureCircle::get_width() const { +// return 2.0 * radius; +// } + +// double ApertureCircle::get_height() const { +// return 2.0 * radius; +// } + +// ApertureHexagon implementations +ApertureHexagon::ApertureHexagon() : side_length(1.0) {} +ApertureHexagon::ApertureHexagon(double s) : side_length(s) {} + +ApertureType ApertureHexagon::get_aperture_type() const { + return ApertureType::HEXAGON; } -double ApertureCircle::get_height() const { - return 2.0 * radius; +double ApertureHexagon::get_side_length() const { + return side_length; } // ApertureRectangleEasy implementations @@ -77,3 +90,13 @@ Vec3d ApertureQuadrilateral::get_p0() const { return m_p0; } Vec3d ApertureQuadrilateral::get_p1() const { return m_p1; } Vec3d ApertureQuadrilateral::get_p2() const { return m_p2; } Vec3d ApertureQuadrilateral::get_p3() const { return m_p3; } + +ApertureAnnulus::ApertureAnnulus() : ri(0.5), ro(1.0), arc(2.0 * SolTrace::Data::PI) {} +ApertureAnnulus::ApertureAnnulus(double r_inner, double r_outer, double arc) + : ri(r_inner), ro(r_outer), arc(arc) {} +ApertureType ApertureAnnulus::get_aperture_type() const { + return ApertureType::ANNULUS; +} +double ApertureAnnulus::get_radius_inner() const { return ri; } +double ApertureAnnulus::get_radius_outer() const { return ro; } +double ApertureAnnulus::get_arc() const { return arc; } diff --git a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/Aperture.h b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/Aperture.h index 4f6dd9ba..83228d5d 100644 --- a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/Aperture.h +++ b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/Aperture.h @@ -41,13 +41,27 @@ namespace OptixCSP { virtual ApertureType get_aperture_type() const override; void set_size(double r); virtual double get_radius() const override; - virtual double get_width() const override; - virtual double get_height() const override; + // virtual double get_width() const override; + // virtual double get_height() const override; private: double radius; }; + // Concrete class for a circular aperture. + class ApertureHexagon : public Aperture { + public: + ApertureHexagon(); + ApertureHexagon(double s); + virtual ~ApertureHexagon() = default; + + virtual ApertureType get_aperture_type() const override; + virtual double get_side_length() const; + + private: + double side_length; + }; + // Concrete class for an easy rectangular aperture. class ApertureRectangle : public Aperture { public: @@ -114,4 +128,19 @@ namespace OptixCSP { Vec3d m_p3; }; -} \ No newline at end of file + class ApertureAnnulus : public Aperture { + public: + ApertureAnnulus(); + ApertureAnnulus(double ri, double ro, double arc); + virtual ~ApertureAnnulus() = default; + virtual ApertureType get_aperture_type() const override; + double get_radius_inner() const; + double get_radius_outer() const; + double get_arc() const; + private: + double ri; + double ro; + double arc; // Measured in radians + }; + +} diff --git a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/CspElement.cpp b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/CspElement.cpp index cfbec7fe..7ade3585 100644 --- a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/CspElement.cpp +++ b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/CspElement.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include "vec3d.h" @@ -22,6 +21,7 @@ CspElementBase::CspElementBase() CspElement::CspElement() { m_origin = Vec3d(0.0, 0.0, 0.0); + m_aim_point = Vec3d(0.0, 0.0, 1.0); m_rotation_matrix = Matrix33d(); m_surface = nullptr; m_aperture = nullptr; @@ -42,6 +42,16 @@ void CspElement::set_origin(const Vec3d &o) m_origin = o; } +const Vec3d &CspElement::get_aim_point() const +{ + return m_aim_point; +} + +void CspElement::set_aim_point(const Vec3d &ap) +{ + m_aim_point = ap; +} + std::shared_ptr CspElement::get_aperture() const { return m_aperture; @@ -91,7 +101,7 @@ Matrix33d CspElement::get_rotation_matrix() const return m_rotation_matrix; } -void CspElement::set_rotation_matrix(const Matrix33d& rotation_matrix) +void CspElement::set_rotation_matrix(const Matrix33d &rotation_matrix) { m_rotation_matrix = rotation_matrix; } @@ -218,14 +228,14 @@ GeometryDataST CspElement::toDeviceGeometryData() const Vec3d edge_y = v2 * (float)height; Vec3d local_anchor(x_coord + width, y_coord, 0.0); - //float3 anchor = OptixCSP::toFloat3(m_origin - v1 * 0.5 - v2 * 0.5); + // float3 anchor = OptixCSP::toFloat3(m_origin - v1 * 0.5 - v2 * 0.5); Vec3d global_anchor = rotation_matrix * local_anchor + m_origin; - GeometryDataST::Rectangle_Parabolic heliostat(OptixCSP::toFloat3(edge_x), - OptixCSP::toFloat3(edge_y), - OptixCSP::toFloat3(global_anchor), - (float)m_surface->get_curvature_1(), - (float)m_surface->get_curvature_2()); + GeometryDataST::Rectangle_Parabolic heliostat(OptixCSP::toFloat3(edge_x), + OptixCSP::toFloat3(edge_y), + OptixCSP::toFloat3(global_anchor), + (float)m_surface->get_curvature_1(), + (float)m_surface->get_curvature_2()); geometry_data.setRectangleParabolic(heliostat); } @@ -236,13 +246,10 @@ GeometryDataST CspElement::toDeviceGeometryData() const float3 center = OptixCSP::toFloat3(m_origin); Matrix33d rotation_matrix = get_rotation_matrix(); // L2G rotation matrix - float3 base_x = OptixCSP::toFloat3(rotation_matrix.get_x_basis()); - float3 base_z = OptixCSP::toFloat3(rotation_matrix.get_z_basis()); GeometryDataST::Cylinder_Y heliostat(center, radius, half_height, base_x, base_z); - geometry_data.setCylinder_Y(heliostat); } } @@ -263,7 +270,9 @@ GeometryDataST CspElement::toDeviceGeometryData() const Vec3d v2_global = rotation_matrix * v2 + m_origin; Vec3d v3_global = rotation_matrix * v3 + m_origin; - GeometryDataST::Triangle_Flat heliostat(OptixCSP::toFloat3(v1_global), OptixCSP::toFloat3(v2_global), OptixCSP::toFloat3(v3_global)); + GeometryDataST::Triangle_Flat heliostat(OptixCSP::toFloat3(v1_global), + OptixCSP::toFloat3(v2_global), + OptixCSP::toFloat3(v3_global)); geometry_data.setTriangle_Flat(heliostat); } @@ -292,6 +301,47 @@ GeometryDataST CspElement::toDeviceGeometryData() const geometry_data.setQuadrilateral_Flat(heliostat); } + if (aperture_type == ApertureType::CIRCLE) + { + ApertureCircle circ = static_cast(*m_aperture); + float r = circ.get_radius(); + float3 o = OptixCSP::toFloat3(m_origin); + float3 n = normalize(OptixCSP::toFloat3(m_aim_point - m_origin)); + if (surface_type == SurfaceType::FLAT) + { + GeometryDataST::Circle_Flat heliostat(o, n, r); + geometry_data.setCircle_Flat(heliostat); + } + } + + if (aperture_type == ApertureType::HEXAGON) + { + ApertureHexagon hex = static_cast(*m_aperture); + float s = hex.get_side_length(); + float3 o = OptixCSP::toFloat3(m_origin); + float3 n = normalize(OptixCSP::toFloat3(m_aim_point - m_origin)); + if (surface_type == SurfaceType::FLAT) + { + GeometryDataST::Hexagon_Flat hex(o, n, s); + geometry_data.setHexagon_Flat(hex); + } + } + + if (aperture_type == ApertureType::ANNULUS) + { + ApertureAnnulus anf = static_cast(*m_aperture); + float radius_in = anf.get_radius_inner(); + float radius_out = anf.get_radius_outer(); + float arc = anf.get_arc(); + float3 o = OptixCSP::toFloat3(m_origin); + float3 n = normalize(OptixCSP::toFloat3(m_aim_point - m_origin)); + if (surface_type == SurfaceType::FLAT) + { + GeometryDataST::Annulus_Flat anf(o, n, radius_in, radius_out, arc); + geometry_data.setAnnulus_Flat(anf); + } + } + geometry_data.id = this->m_id; return geometry_data; diff --git a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/CspElement.h b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/CspElement.h index 78a0572e..0a89c0cc 100644 --- a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/CspElement.h +++ b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/CspElement.h @@ -26,6 +26,9 @@ namespace OptixCSP // Positioning and orientation. virtual const Vec3d &get_origin() const = 0; virtual void set_origin(const OptixCSP::Vec3d &) = 0; + virtual const Vec3d &get_aim_point() const = 0; + virtual void set_aim_point(const Vec3d &o) = 0; + // virtual const Vec3d& get_euler_angles() const = 0; // virtual void set_euler_angles(const Vec3d&) = 0; @@ -55,6 +58,9 @@ namespace OptixCSP const Vec3d &get_origin() const override; void set_origin(const Vec3d &o) override; + const Vec3d &get_aim_point() const override; + void set_aim_point(const Vec3d &o) override; + std::shared_ptr get_aperture() const; std::shared_ptr get_surface() const; ApertureType get_aperture_type() const; @@ -106,6 +112,7 @@ namespace OptixCSP const OpticalDistribution od); Vec3d m_origin; + Vec3d m_aim_point; Matrix33d m_rotation_matrix; Vec3d m_upper_box_bound; // Global coordinates diff --git a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/data_manager.cpp b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/data_manager.cpp index 9baf1cfe..9ac1344b 100644 --- a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/data_manager.cpp +++ b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/data_manager.cpp @@ -1,10 +1,7 @@ #include "data_manager.h" #include "soltrace_system.h" #include "utils/util_check.hpp" -#include -#include #include -#include #include #include #include "sun_utils.h" @@ -20,7 +17,8 @@ dataManager::dataManager() sun_user_intensity_D(nullptr), sun_user_capacity(0), rng_states_D(nullptr), - rng_states_capacity(0) + rng_states_capacity(0), + depth_exceeded_count_D(nullptr) { // Initialize launch parameters with default values @@ -29,11 +27,12 @@ dataManager::dataManager() launch_params_H.max_depth = 5; launch_params_H.ray_offset = 0; - launch_params_H.hit_point_buffer = nullptr; + // launch_params_H.hit_point_buffer = nullptr; + launch_params_H.hit_buffer = nullptr; launch_params_H.sun_dir_buffer = nullptr; launch_params_H.rng_states = nullptr; - launch_params_H.element_id_buffer = nullptr; - launch_params_H.hit_type_buffer = nullptr; + // launch_params_H.element_id_buffer = nullptr; + // launch_params_H.hit_type_buffer = nullptr; launch_params_H.sun_vector = make_float3(0.0f, 0.0f, 10.0f); launch_params_H.sun_shape = OptixCSP::SunShape::UNKNOWN; launch_params_H.include_sun_shape_errors = false; @@ -58,6 +57,8 @@ dataManager::dataManager() launch_params_H.material_data_array_back = nullptr; launch_params_H.sun_dir_seed = 0ULL; + launch_params_H.d_depth_exceeded_count = nullptr; + launch_params_H.geometry_data_array = nullptr; launch_params_H.handle = OptixTraversableHandle{}; @@ -73,6 +74,11 @@ OptixCSP::LaunchParams *dataManager::getDeviceLaunchParams() const { return laun void dataManager::allocateLaunchParams() { + if (launch_params_D) + { + CUDA_CHECK(cudaFree(launch_params_D)); + launch_params_D = nullptr; + } CUDA_CHECK(cudaMalloc(reinterpret_cast(&launch_params_D), sizeof(LaunchParams))); } @@ -109,8 +115,23 @@ void dataManager::ensureCurandStates( launch_params_H.rng_states = rng_states_D; } +void dataManager::ensureDepthExceededCounter() +{ + if (depth_exceeded_count_D == nullptr) + { + CUDA_CHECK(cudaMalloc(reinterpret_cast(&depth_exceeded_count_D), sizeof(uint64_t))); + CUDA_CHECK(cudaMemset(depth_exceeded_count_D, 0, sizeof(uint64_t))); + launch_params_H.d_depth_exceeded_count = depth_exceeded_count_D; + } +} + void dataManager::allocateGeometryDataArray(std::vector geometry_data_array_H) { + if (geometry_data_array_D) + { + CUDA_CHECK(cudaFree(geometry_data_array_D)); + geometry_data_array_D = nullptr; + } CUDA_CHECK(cudaMalloc(reinterpret_cast(&geometry_data_array_D), geometry_data_array_H.size() * sizeof(GeometryDataST))); @@ -138,6 +159,16 @@ void dataManager::updateGeometryDataArray(std::vector geometry_d void dataManager::allocateMaterialDataArray(std::vector material_data_array_front_H, std::vector material_data_array_back_H) { + if (material_data_array_front_D) + { + CUDA_CHECK(cudaFree(material_data_array_front_D)); + material_data_array_front_D = nullptr; + } + if (material_data_array_back_D) + { + CUDA_CHECK(cudaFree(material_data_array_back_D)); + material_data_array_back_D = nullptr; + } CUDA_CHECK(cudaMalloc(reinterpret_cast(&material_data_array_front_D), material_data_array_front_H.size() * sizeof(MaterialData))); @@ -260,5 +291,11 @@ void dataManager::cleanup() { } launch_params_H.rng_states = nullptr; + if (depth_exceeded_count_D != nullptr) { + CUDA_CHECK(cudaFree(depth_exceeded_count_D)); + depth_exceeded_count_D = nullptr; + } + launch_params_H.d_depth_exceeded_count = nullptr; + launch_params_H.handle = OptixTraversableHandle{}; } diff --git a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/data_manager.h b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/data_manager.h index 7922d132..9d59219e 100644 --- a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/data_manager.h +++ b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/data_manager.h @@ -29,6 +29,9 @@ namespace OptixCSP curandState *rng_states_D; size_t rng_states_capacity; + // Device counter: rays terminated by max depth (not absorption) + uint64_t *depth_exceeded_count_D; + float *sun_user_angle_D; float *sun_user_intensity_D; size_t sun_user_capacity; @@ -68,5 +71,8 @@ namespace OptixCSP unsigned long long seed, unsigned int sequence_offset, cudaStream_t stream); + + /// Allocates the depth-exceeded counter on the device if not already allocated. + void ensureDepthExceededCounter(); }; } diff --git a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/geometry_manager.cpp b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/geometry_manager.cpp index 8369ecb0..118f69b9 100644 --- a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/geometry_manager.cpp +++ b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/geometry_manager.cpp @@ -3,7 +3,6 @@ #include "sun_utils.h" #include "soltrace_state.h" #include "utils/util_check.hpp" -#include "data_manager.h" #include @@ -41,9 +40,56 @@ void GeometryManager::collect_geometry_info(const std::vectorget_aperture_type() == ApertureType::CIRCLE) - // { - // } + if (element->get_aperture_type() == ApertureType::ANNULUS) + { + if (element->get_surface_type() == SurfaceType::FLAT) + { + sbt_offset = static_cast(OpticalEntityType::ANNULUS_FLAT); + } + else + { + std::stringstream ss; + ss << "Unimplemented surface type (" + << static_cast(element->get_surface_type()) + << ") for annular aperture (" + << static_cast(element->get_aperture_type()); + throw std::runtime_error(ss.str()); + } + } + + if (element->get_aperture_type() == ApertureType::CIRCLE) + { + if (element->get_surface_type() == SurfaceType::FLAT) + { + sbt_offset = static_cast(OpticalEntityType::CIRCLE_FLAT); + } + else + { + std::stringstream ss; + ss << "Unimplemented surface type (" + << static_cast(element->get_surface_type()) + << ") for circular aperture (" + << static_cast(element->get_aperture_type()); + throw std::runtime_error(ss.str()); + } + } + + if (element->get_aperture_type() == ApertureType::HEXAGON) + { + if (element->get_surface_type() == SurfaceType::FLAT) + { + sbt_offset = static_cast(OpticalEntityType::HEXAGON_FLAT); + } + else + { + std::stringstream ss; + ss << "Unimplemented surface type (" + << static_cast(element->get_surface_type()) + << ") for hexagon aperture (" + << static_cast(element->get_aperture_type()); + throw std::runtime_error(ss.str()); + } + } if (element->get_aperture_type() == ApertureType::RECTANGLE) { diff --git a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/pipeline_manager.cpp b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/pipeline_manager.cpp index 9985b17c..ade2345d 100644 --- a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/pipeline_manager.cpp +++ b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/pipeline_manager.cpp @@ -42,7 +42,10 @@ const std::map IntersectionKernelMap = { {OpticalEntityType::RECTANGLE_FLAT, "__intersection__rectangle_flat"}, {OpticalEntityType::TRIANGLE_FLAT, "__intersection__triangle_flat"}, {OpticalEntityType::CYLINDRICAL, "__intersection__cylinder_y"}, - {OpticalEntityType::QUADRILATERAL_FLAT, "__intersection__quadrilateral_flat"}}; + {OpticalEntityType::QUADRILATERAL_FLAT, "__intersection__quadrilateral_flat"}, + {OpticalEntityType::CIRCLE_FLAT, "__intersection__circle_flat"}, + {OpticalEntityType::HEXAGON_FLAT, "__intersection__hexagon_flat"}, + {OpticalEntityType::ANNULUS_FLAT, "__intersection__annulus_flat"}}; pipelineManager::pipelineManager(SoltraceState &state) : m_state(state) {} @@ -115,6 +118,7 @@ void pipelineManager::loadModules() // Geometry module. { std::string ptx = loadPtxFromFile("intersection"); + LOG_SIZE = sizeof(LOG); OPTIX_CHECK(optixModuleCreate( m_state.context, &moduleCompileOptions, @@ -127,6 +131,7 @@ void pipelineManager::loadModules() // Shading/materials module. { std::string ptx = loadPtxFromFile("materials"); + LOG_SIZE = sizeof(LOG); OPTIX_CHECK(optixModuleCreate( m_state.context, &moduleCompileOptions, @@ -139,6 +144,7 @@ void pipelineManager::loadModules() // Sun module. { std::string ptx = loadPtxFromFile("sun"); + LOG_SIZE = sizeof(LOG); OPTIX_CHECK(optixModuleCreate( m_state.context, &moduleCompileOptions, @@ -169,10 +175,10 @@ void pipelineManager::createPipeline() // Link program groups to pipeline OptixPipelineLinkOptions pipeline_link_options = {}; - // TODO max trace belong to who? - pipeline_link_options.maxTraceDepth = MAX_TRACE_DEPTH; // Maximum recursion depth for ray tracing. + pipeline_link_options.maxTraceDepth = m_max_trace_depth; // Maximum recursion depth for ray tracing. // Create the OptiX pipeline by linking the program groups. + LOG_SIZE = sizeof(LOG); OPTIX_CHECK(optixPipelineCreate( m_state.context, // OptiX context. &m_state.pipeline_compile_options, // Compile options for the pipeline. @@ -197,7 +203,7 @@ void pipelineManager::createPipeline() // Compute stack sizes based on the maximum trace depth and other settings. OPTIX_CHECK(optixUtilComputeStackSizes( &stack_sizes, // Input stack sizes. - MAX_TRACE_DEPTH, // Maximum trace depth. + m_max_trace_depth, // Maximum trace depth. 0, // maxCCDepth: Maximum depth of continuation callables (none in this case). 0, // maxDCDepth: Maximum depth of direct callables (none in this case). &direct_callable_stack_size_from_traversal, // Output: Stack size for callable traversal. @@ -233,6 +239,7 @@ void pipelineManager::createHitGroupProgram(OptixProgramGroup &group, desc.hitgroup.moduleAH = nullptr; desc.hitgroup.entryFunctionNameAH = nullptr; + LOG_SIZE = sizeof(LOG); OPTIX_CHECK(optixProgramGroupCreate( m_state.context, &desc, @@ -256,7 +263,12 @@ void pipelineManager::createSunProgram() desc.raygen.entryFunctionName = "__raygen__sun_source"; // Create the program group - OPTIX_CHECK_LOG(optixProgramGroupCreate( + // Note: OPTIX_CHECK_LOG is not used here because the macro creates its own + // local LOG_ buffer (not the global LOG), causing it to always print 2048 + // null bytes to stderr. Instead we use OPTIX_CHECK and manually print any + // non-empty log content when verbose mode is enabled. + LOG_SIZE = sizeof(LOG); + OPTIX_CHECK(optixProgramGroupCreate( m_state.context, // OptiX context. &desc, // Descriptor defining the program group. 1, // Number of program groups to create (1 in this case). @@ -264,6 +276,11 @@ void pipelineManager::createSunProgram() LOG, &LOG_SIZE, // Logs to capture diagnostic information. &group // Output: Handle for the created program group. )); + if (LOG_SIZE > 1 && LOG[0] != '\0') + { + std::cerr << "OptiX log for optixProgramGroupCreate (sun):\n" + << std::string(LOG, LOG + LOG_SIZE) << std::endl; + } m_program_groups.push_back(group); m_state.raygen_prog_group = group; @@ -333,6 +350,7 @@ void pipelineManager::createMissProgram() desc.miss.entryFunctionName = "__miss__ms"; // Create the program grou + LOG_SIZE = sizeof(LOG); OPTIX_CHECK(optixProgramGroupCreate( m_state.context, &desc, diff --git a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/pipeline_manager.h b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/pipeline_manager.h index 0d77791b..510b39f8 100644 --- a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/pipeline_manager.h +++ b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/pipeline_manager.h @@ -34,6 +34,8 @@ namespace OptixCSP void set_verbose(bool verbose) { m_verbose = verbose; } + void set_max_trace_depth(uint8_t depth) { m_max_trace_depth = depth; } + /** * @brief Loads PTX code from a file. * @param kernelName The name of the PTX kernel file to load. @@ -112,6 +114,7 @@ namespace OptixCSP std::vector m_program_groups; ///< Stores all created OptiX program groups. std::map m_intersection_program_group_map; ///< Map surface-aperture combinations to index in m_program_groups bool m_verbose = false; + uint8_t m_max_trace_depth = DEFAULT_MAX_TRACE_DEPTH; // // Number of program groups categorized by type. // int num_raygen_programs = 1; ///< Number of ray generation programs. diff --git a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/ray_utils.cu b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/ray_utils.cu new file mode 100644 index 00000000..a685e2c2 --- /dev/null +++ b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/ray_utils.cu @@ -0,0 +1,276 @@ +#include "ray_utils.h" + +#include "shaders/Soltrace.h" +#include "utils/util_check.hpp" + +#include +#include +#include + +#include +#include +#include + +namespace OptixCSP +{ + + // --------------------------------------------------------------------------- + // Pass 1 – count output records per ray. + // + // For each ray the loop reads up to max_depth HitRecord entries. + // raw_count accumulates every valid record (HIT_CREATE through HIT_EXIT). + // A ray with only a HIT_CREATE event (raw_count == 1) contributes 0 output + // records (it missed all elements). Any ray with raw_count > 1 contributes + // all raw_count records (CREATE + one or more hits). + // --------------------------------------------------------------------------- + __global__ static void count_ray_outputs( + const HitRecord *__restrict__ hit_buffer, + uint32_t num_rays, + uint32_t max_depth, + uint8_t *__restrict__ out_record_count, + uint8_t *__restrict__ out_has_hit) + { + const uint32_t ray = blockIdx.x * blockDim.x + threadIdx.x; + if (ray >= num_rays) + return; + + uint32_t raw_count = 0; + for (uint32_t depth = 0; depth < max_depth; ++depth) + { + const uint8_t ht = hit_buffer[max_depth * ray + depth].hit_type; + if (ht < HIT_CREATE || ht > HIT_EXIT) + break; + ++raw_count; + if (ht == HIT_ABSORB || ht == HIT_EXIT) + break; + } + + const uint8_t has_hit = (raw_count > 1) ? 1u : 0u; + out_record_count[ray] = has_hit ? static_cast(raw_count) : 0u; + out_has_hit[ray] = has_hit; + + return; + } + + // --------------------------------------------------------------------------- + // Pass 2 – write compacted records. + // + // Each thread handles one ray. Rays whose entry in the exclusive prefix-sum + // equals that of the next ray (i.e. record_count was 0) write nothing. + // For qualifying rays the CREATE record is written first, followed by all + // subsequent hit records, using the pre-computed offset as the base index. + // --------------------------------------------------------------------------- + __global__ static void compact_ray_outputs( + const HitRecord *__restrict__ hit_buffer, + uint32_t num_rays, + uint32_t max_depth, + const uint64_t *__restrict__ offsets, + const uint8_t *__restrict__ has_hit, + HitRecord *__restrict__ out_buffer) + { + const uint32_t ray = blockIdx.x * blockDim.x + threadIdx.x; + if (ray >= num_rays || !has_hit[ray]) + return; + + const HitRecord *ray_base = hit_buffer + max_depth * ray; + uint64_t out_idx = offsets[ray]; + + // Depth 0 is always HIT_CREATE for qualifying rays + out_buffer[out_idx++] = ray_base[0]; + + for (uint32_t depth = 1; depth < max_depth; ++depth) + { + const HitRecord &hr = ray_base[depth]; + const uint8_t ht = hr.hit_type; + + if (ht < HIT_CREATE || ht > HIT_EXIT) + break; + + out_buffer[out_idx++] = hr; + if (ht == HIT_ABSORB || ht == HIT_EXIT) + break; + } + + return; + } + + // --------------------------------------------------------------------------- + // Host-callable scratch management + // --------------------------------------------------------------------------- + void allocate_compaction_scratch(CompactionScratch &scratch, uint64_t num_rays, uint64_t max_depth) + { + free_compaction_scratch(scratch); + + CUDA_CHECK(cudaMalloc(&scratch.d_count, num_rays * sizeof(uint8_t))); + CUDA_CHECK(cudaMalloc(&scratch.d_offsets, num_rays * sizeof(uint64_t))); + CUDA_CHECK(cudaMalloc(&scratch.d_has_hit, num_rays * sizeof(uint8_t))); + CUDA_CHECK(cudaMalloc(&scratch.d_n_hit, sizeof(uint32_t))); + + // Query CUB temp-storage sizes using typed null pointers (size query only). + // scan_bytes must cover both ExclusiveSum and DeviceSelect::Flagged (d_scan_tmp is reused). + uint32_t *null_u32 = nullptr; + uint8_t *null_u8 = nullptr; + uint64_t *null_u64 = nullptr; + cub::DeviceScan::ExclusiveSum(scratch.d_scan_tmp, scratch.scan_bytes, null_u8, null_u64, num_rays); + cub::DeviceReduce::Sum(scratch.d_red_tmp, scratch.red_bytes, null_u8, null_u32, num_rays); + + size_t select_bytes = 0; + thrust::counting_iterator count_iter(0ull); + cub::DeviceSelect::Flagged(nullptr, select_bytes, count_iter, null_u8, null_u64, null_u32, num_rays); + if (select_bytes > scratch.scan_bytes) + scratch.scan_bytes = select_bytes; + + CUDA_CHECK(cudaMalloc(&scratch.d_scan_tmp, scratch.scan_bytes > 0 ? scratch.scan_bytes : 1)); + CUDA_CHECK(cudaMalloc(&scratch.d_red_tmp, scratch.red_bytes > 0 ? scratch.red_bytes : 1)); + + // Worst-case compacted output: every slot in the hit buffer could be kept + CUDA_CHECK(cudaMalloc(&scratch.d_compacted, num_rays * max_depth * sizeof(HitRecord))); + + // CUDA events for GPU-phase timing + CUDA_CHECK(cudaEventCreate(&scratch.e_gpu1_start)); + CUDA_CHECK(cudaEventCreate(&scratch.e_gpu1_stop)); + CUDA_CHECK(cudaEventCreate(&scratch.e_gpu2_start)); + CUDA_CHECK(cudaEventCreate(&scratch.e_gpu2_stop)); + } + + void free_compaction_scratch(CompactionScratch &scratch) + { + // cudaFree is nullptr-safe + cudaFree(scratch.d_count); + cudaFree(scratch.d_offsets); + cudaFree(scratch.d_has_hit); + cudaFree(scratch.d_n_hit); + cudaFree(scratch.d_scan_tmp); + cudaFree(scratch.d_red_tmp); + cudaFree(scratch.d_compacted); + // cudaEventDestroy is not nullptr-safe + if (scratch.e_gpu1_start) cudaEventDestroy(scratch.e_gpu1_start); + if (scratch.e_gpu1_stop) cudaEventDestroy(scratch.e_gpu1_stop); + if (scratch.e_gpu2_start) cudaEventDestroy(scratch.e_gpu2_start); + if (scratch.e_gpu2_stop) cudaEventDestroy(scratch.e_gpu2_stop); + scratch = CompactionScratch{}; + } + + // --------------------------------------------------------------------------- + // Host-callable orchestrator + // --------------------------------------------------------------------------- + uint32_t gpu_compact_hit_buffer( + const HitRecord *d_hit_buffer, + uint32_t num_rays, + uint32_t max_depth, + uint64_t ray_offset, + std::vector &host_out, + std::vector &host_ray_ids, + cudaStream_t stream, + CompactionScratch &scratch, + CompactionTimings *timings) + { + if (num_rays == 0) + return 0; + + // ---- Pass 1: count records per ray ---- + const uint32_t block_size = 256; + const uint32_t grid_size = (num_rays + block_size - 1) / block_size; + + if (timings) CUDA_CHECK(cudaEventRecord(scratch.e_gpu1_start, stream)); + + count_ray_outputs<<>>( + d_hit_buffer, num_rays, max_depth, scratch.d_count, scratch.d_has_hit); + + // ---- Exclusive prefix-sum: d_count → d_offsets ---- + cub::DeviceScan::ExclusiveSum(scratch.d_scan_tmp, scratch.scan_bytes, scratch.d_count, scratch.d_offsets, num_rays, stream); + + // ---- Reduce: sum(d_has_hit) → d_n_hit ---- + cub::DeviceReduce::Sum(scratch.d_red_tmp, scratch.red_bytes, scratch.d_has_hit, scratch.d_n_hit, num_rays, stream); + + if (timings) CUDA_CHECK(cudaEventRecord(scratch.e_gpu1_stop, stream)); + + // ---- Synchronize to read back scalar results ---- + CUDA_CHECK(cudaStreamSynchronize(stream)); + + if (timings) + { + float ms = 0.f; + CUDA_CHECK(cudaEventElapsedTime(&ms, scratch.e_gpu1_start, scratch.e_gpu1_stop)); + timings->gpu_phase1_ms += ms; + } + + // ---- D→H scalar memcpy (CPU wall-clock) ---- + std::chrono::high_resolution_clock::time_point t_scalar; + if (timings) t_scalar = std::chrono::high_resolution_clock::now(); + + uint64_t last_offset = 0; + uint8_t last_count = 0; + uint32_t n_hit_rays = 0; + CUDA_CHECK(cudaMemcpy(&last_offset, scratch.d_offsets + (num_rays - 1), sizeof(uint64_t), cudaMemcpyDeviceToHost)); + CUDA_CHECK(cudaMemcpy(&last_count, scratch.d_count + (num_rays - 1), sizeof(uint8_t), cudaMemcpyDeviceToHost)); + CUDA_CHECK(cudaMemcpy(&n_hit_rays, scratch.d_n_hit, sizeof(uint32_t), cudaMemcpyDeviceToHost)); + + if (timings) + timings->scalar_dth_ms += std::chrono::duration( + std::chrono::high_resolution_clock::now() - t_scalar).count(); + + const uint64_t total_records = last_offset + last_count; + + if (total_records > 0) + { + // ---- Pass 2: write compacted HitRecords to pre-allocated device buffer ---- + if (timings) CUDA_CHECK(cudaEventRecord(scratch.e_gpu2_start, stream)); + + compact_ray_outputs<<>>( + d_hit_buffer, num_rays, max_depth, scratch.d_offsets, scratch.d_has_hit, scratch.d_compacted); + + // ---- Compact global ray IDs into d_offsets (reused after pass 2) ---- + // DeviceSelect::Flagged selects (ray_offset + i) for each i where d_has_hit[i] == 1. + // d_scan_tmp and d_offsets are both free after compact_ray_outputs completes. + thrust::counting_iterator ray_id_iter(ray_offset); + cub::DeviceSelect::Flagged( + scratch.d_scan_tmp, scratch.scan_bytes, + ray_id_iter, scratch.d_has_hit, + scratch.d_offsets, // output: global IDs of hit rays (uint64_t) + scratch.d_n_hit, // output count (already read; safe to overwrite) + num_rays, stream); + + if (timings) CUDA_CHECK(cudaEventRecord(scratch.e_gpu2_stop, stream)); + + CUDA_CHECK(cudaStreamSynchronize(stream)); + + if (timings) + { + float ms = 0.f; + CUDA_CHECK(cudaEventElapsedTime(&ms, scratch.e_gpu2_start, scratch.e_gpu2_stop)); + timings->gpu_phase2_ms += ms; + } + + // ---- D→H bulk memcpy (CPU wall-clock) ---- + std::chrono::high_resolution_clock::time_point t_bulk; + if (timings) t_bulk = std::chrono::high_resolution_clock::now(); + + // ---- Copy compacted HitRecords to host ---- + const size_t prev_rec = host_out.size(); + host_out.resize(prev_rec + total_records); + CUDA_CHECK(cudaMemcpy( + host_out.data() + prev_rec, + scratch.d_compacted, + total_records * sizeof(HitRecord), + cudaMemcpyDeviceToHost)); + + // ---- Copy global ray IDs to host (one per logical hit ray) ---- + const size_t prev_ids = host_ray_ids.size(); + host_ray_ids.resize(prev_ids + n_hit_rays); + CUDA_CHECK(cudaMemcpy( + host_ray_ids.data() + prev_ids, + scratch.d_offsets, + n_hit_rays * sizeof(uint64_t), + cudaMemcpyDeviceToHost)); + + if (timings) + timings->bulk_dth_ms += std::chrono::duration( + std::chrono::high_resolution_clock::now() - t_bulk).count(); + } + + if (timings) ++timings->n_calls; + return n_hit_rays; + } + +} // namespace OptixCSP diff --git a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/ray_utils.h b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/ray_utils.h new file mode 100644 index 00000000..06f945b4 --- /dev/null +++ b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/ray_utils.h @@ -0,0 +1,69 @@ +#pragma once + +#include +#include +#include + +namespace OptixCSP +{ + + struct HitRecord; + + /// Device scratch buffers required by gpu_compact_hit_buffer. + /// Allocated once via allocate_compaction_scratch and reused across calls + /// as long as num_rays and max_depth stay the same. + struct CompactionScratch + { + uint8_t *d_count = nullptr; // per-ray output record count (bounded by max_depth ≤ 255) + uint64_t *d_offsets = nullptr; // exclusive prefix-sum of d_count (pass 1); reused as global ray IDs (pass 2) + uint8_t *d_has_hit = nullptr; // 1 if ray contributes records, else 0 + uint32_t *d_n_hit = nullptr; // scalar: total hit rays + void *d_scan_tmp = nullptr; // CUB DeviceScan temp storage + size_t scan_bytes = 0; + void *d_red_tmp = nullptr; // CUB DeviceReduce temp storage + size_t red_bytes = 0; + HitRecord *d_compacted = nullptr; // worst-case compacted output (num_rays * max_depth) + + // CUDA events for GPU-phase timing (non-null after allocate_compaction_scratch). + cudaEvent_t e_gpu1_start = nullptr; // before count/scan/reduce kernels + cudaEvent_t e_gpu1_stop = nullptr; // after count/scan/reduce kernels + cudaEvent_t e_gpu2_start = nullptr; // before compact/select kernels + cudaEvent_t e_gpu2_stop = nullptr; // after compact/select kernels + }; + + /// Per-call timing breakdown populated by gpu_compact_hit_buffer. + /// All times are accumulated (ms) across calls. Reset to {} at the start of each run(). + struct CompactionTimings + { + float gpu_phase1_ms = 0.f; // count + scan + reduce kernels (GPU time via CUDA events) + float scalar_dth_ms = 0.f; // 3x scalar D->H cudaMemcpy (CPU wall-clock) + float gpu_phase2_ms = 0.f; // compact + select kernels (GPU time via CUDA events) + float bulk_dth_ms = 0.f; // HitRecord + ray-ID bulk D\u2192H (CPU wall-clock) + uint32_t n_calls = 0; // total gpu_compact_hit_buffer invocations + }; + + /// Allocate all device scratch buffers for the given ray-buffer dimensions. + /// Frees any previous allocation before reallocating. + void allocate_compaction_scratch(CompactionScratch &scratch, uint64_t num_rays, uint64_t max_depth); + + /// Free all device scratch buffers and reset the struct to its default state. + void free_compaction_scratch(CompactionScratch &scratch); + + /// GPU-side stream compaction of the raw hit buffer. + /// Uses pre-allocated scratch buffers — no device allocations occur inside this call. + /// Appends compacted HitRecords to @p host_out and the corresponding global ray indices + /// (ray_offset + local_ray_index) to @p host_ray_ids (one entry per logical hit ray). + /// Pass a non-null @p timings to accumulate per-phase timing (GPU events + CPU wall-clock). + /// @returns Number of rays that produced at least one non-CREATE hit. + uint32_t gpu_compact_hit_buffer( + const HitRecord *d_hit_buffer, + uint32_t num_rays, + uint32_t max_depth, + uint64_t ray_offset, + std::vector &host_out, + std::vector &host_ray_ids, + cudaStream_t stream, + CompactionScratch &scratch, + CompactionTimings *timings = nullptr); + +} // namespace OptixCSP diff --git a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/soltrace_system.cpp b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/soltrace_system.cpp index 12301558..ac1840a8 100644 --- a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/soltrace_system.cpp +++ b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/soltrace_system.cpp @@ -1,21 +1,28 @@ +#ifndef NOMINMAX +#define NOMINMAX +#endif + #include "soltrace_system.h" -#include "geometry_manager.h" +#include "ray_utils.h" + +#include "CspElement.h" #include "data_manager.h" +#include "geometry_manager.h" #include "pipeline_manager.h" +#include "soltrace_constants.h" #include "soltrace_type.h" -#include "CspElement.h" #include "timer.h" -#include "soltrace_constants.h" -#include "../../../../../simulation_data/simdata_io.hpp" -#include "../../../../../simulation_data/solar_position_calculators/basic_sun_position.hpp" + +#include "shaders/Soltrace.h" #include "utils/util_record.hpp" #include "utils/util_check.hpp" #include "utils/math_util.h" -#include + +#include #include #include -#include +#include #include #include @@ -28,54 +35,30 @@ using namespace OptixCSP; // note that this is has to be per optical entity type. typedef Record HitGroupRecord; -void SolTraceSystem::set_verbose(bool verbose) -{ - m_verbose = verbose; - geometry_manager->set_verbose(verbose); - pipeline_manager->set_verbose(verbose); -} - -void SolTraceSystem::print_launch_params() -{ - if (!m_verbose) - { - return; - } - - LaunchParams params = data_manager->launch_params_H; - - float3 sun_box_a = params.sun_v0 - params.sun_v1; - float3 sun_box_b = params.sun_v1 - params.sun_v2; - - float sun_box_edge_a = sqrtf(sun_box_a.x * sun_box_a.x + sun_box_a.y * sun_box_a.y + sun_box_a.z * sun_box_a.z); - float sun_box_edge_b = sqrtf(sun_box_b.x * sun_box_b.x + sun_box_b.y * sun_box_b.y + sun_box_b.z * sun_box_b.z); - - std::cout << "print launch params: " << std::endl; - std::cout << "width : " << params.width << std::endl; - std::cout << "height : " << params.height << std::endl; - std::cout << "max_depth : " << params.max_depth << std::endl; - std::cout << "hit_point_buffer : " << params.hit_point_buffer << std::endl; - std::cout << "sun_dir_buffer : " << params.sun_dir_buffer << std::endl; - std::cout << "sun_vector : " << params.sun_vector.x << " " << params.sun_vector.y << " " << params.sun_vector.z << std::endl; - // std::cout << "max_sun_angle : " << params.max_sun_angle << std::endl; - std::cout << "sun_v0 : " << params.sun_v0.x << " " << params.sun_v0.y << " " << params.sun_v0.z << std::endl; - std::cout << "sun_v1 : " << params.sun_v1.x << " " << params.sun_v1.y << " " << params.sun_v1.z << std::endl; - std::cout << "sun_v2 : " << params.sun_v2.x << " " << params.sun_v2.y << " " << params.sun_v2.z << std::endl; - std::cout << "sun_v3 : " << params.sun_v3.x << " " << params.sun_v3.y << " " << params.sun_v3.z << std::endl; - std::cout << "sun_box_edge_a : " << sun_box_edge_a << std::endl; - std::cout << "sun_box_edge_b : " << sun_box_edge_b << std::endl; -} - SolTraceSystem::SolTraceSystem() : m_number_of_rays(0), m_max_number_of_rays(0), + m_batch_size(0), + m_max_ray_depth(DEFAULT_MAX_TRACE_DEPTH), m_verbose(false), m_mem_free_before(0), m_mem_free_after(0), m_optical_errors(false), + m_n_hit_rays(0), + m_n_sun_rays(0), + m_n_depth_exceeded_rays(0), m_include_sun_shape_errors(false), m_timer_setup(), m_timer_trace(), + m_timer_aabb(), + m_timer_geometry(), + m_timer_pipeline(), + m_timer_sbt(), + m_timer_setup_buffer(), + m_timer_optix_launch(), + m_timer_collect_results(), + m_n_run_iterations(0), + m_mem_free_post_setup(0), geometry_manager(std::make_shared(m_state, m_verbose)), data_manager(std::make_shared()), pipeline_manager(std::make_shared(m_state)), @@ -87,9 +70,9 @@ SolTraceSystem::SolTraceSystem() if (m_verbose) { std::cout << "Using OPTIX Version: " << major - << "." << minor - << "." << micro - << std::endl; + << "." << minor + << "." << micro + << std::endl; } CUDA_CHECK(cudaFree(0)); @@ -109,6 +92,63 @@ SolTraceSystem::SolTraceSystem() SolTraceSystem::~SolTraceSystem() { + clean_up(); +} + +void SolTraceSystem::set_max_ray_depth(uint64_t depth) +{ + if (depth < 2) + { + std::cerr << "[OptixRunner] WARNING: max_ray_depth (" << depth + << ") is below the minimum of 2. Clamping to 2.\n"; + depth = 2; + } + else if (depth > 255) + { + std::cerr << "[OptixRunner] WARNING: max_ray_depth (" << depth + << ") exceeds the maximum of 255. Clamping to 255.\n"; + depth = 255; + } + m_max_ray_depth = depth; +} + +void SolTraceSystem::set_verbose(bool verbose) +{ + m_verbose = verbose; + geometry_manager->set_verbose(verbose); + pipeline_manager->set_verbose(verbose); +} + +void SolTraceSystem::print_launch_params() +{ + if (!m_verbose) + { + return; + } + + LaunchParams params = data_manager->launch_params_H; + + float3 sun_box_a = params.sun_v0 - params.sun_v1; + float3 sun_box_b = params.sun_v1 - params.sun_v2; + + float sun_box_edge_a = sqrtf(sun_box_a.x * sun_box_a.x + sun_box_a.y * sun_box_a.y + sun_box_a.z * sun_box_a.z); + float sun_box_edge_b = sqrtf(sun_box_b.x * sun_box_b.x + sun_box_b.y * sun_box_b.y + sun_box_b.z * sun_box_b.z); + + std::cout << "print launch params: " << std::endl; + std::cout << "width : " << params.width << std::endl; + std::cout << "height : " << params.height << std::endl; + std::cout << "max_depth : " << params.max_depth << std::endl; + // std::cout << "hit_point_buffer : " << params.hit_point_buffer << std::endl; + std::cout << "hit_buffer : " << params.hit_buffer << std::endl; + std::cout << "sun_dir_buffer : " << params.sun_dir_buffer << std::endl; + std::cout << "sun_vector : " << params.sun_vector.x << " " << params.sun_vector.y << " " << params.sun_vector.z << std::endl; + // std::cout << "max_sun_angle : " << params.max_sun_angle << std::endl; + std::cout << "sun_v0 : " << params.sun_v0.x << " " << params.sun_v0.y << " " << params.sun_v0.z << std::endl; + std::cout << "sun_v1 : " << params.sun_v1.x << " " << params.sun_v1.y << " " << params.sun_v1.z << std::endl; + std::cout << "sun_v2 : " << params.sun_v2.x << " " << params.sun_v2.y << " " << params.sun_v2.z << std::endl; + std::cout << "sun_v3 : " << params.sun_v3.x << " " << params.sun_v3.y << " " << params.sun_v3.z << std::endl; + std::cout << "sun_box_edge_a : " << sun_box_edge_a << std::endl; + std::cout << "sun_box_edge_b : " << sun_box_edge_b << std::endl; } void SolTraceSystem::initialize() @@ -129,8 +169,18 @@ void SolTraceSystem::initialize() OPTIX_CHECK(optixDeviceContextCreate(cuCtx, &options, &m_state.context)); } - size_t mem_total; - cudaMemGetInfo(&m_mem_free_before, &mem_total); + // Create the CUDA stream immediately after the context so that all subsequent + // GPU work (GAS build, kernel launches, optixLaunch) uses the same named stream. + // Doing this before create_geometries() ensures optixAccelBuild and optixLaunch + // share a single stream, giving explicit serial ordering without relying solely + // on legacy null-stream synchronization semantics. + if (!m_state.stream) + CUDA_CHECK(cudaStreamCreate(&m_state.stream)); + + { + size_t mem_total; + CUDA_CHECK(cudaMemGetInfo(&m_mem_free_before, &mem_total)); + } m_timer_setup.start(); // set up input related to sun @@ -139,24 +189,25 @@ void SolTraceSystem::initialize() sun_vec_norm = glm::normalize(sun_vec_norm); data_manager->launch_params_H.sun_vector = make_float3(static_cast(sun_vec_norm[0]), - static_cast(sun_vec_norm[1]), static_cast(sun_vec_norm[2])); + static_cast(sun_vec_norm[1]), + static_cast(sun_vec_norm[2])); // Set generation type switch (m_sun->get_gen_type()) { - case(SolTrace::Data::GenType::RANDOM): - data_manager->launch_params_H.sun_gen_type = OptixCSP::GenType::RANDOM; - break; - case(SolTrace::Data::GenType::HALTON): - data_manager->launch_params_H.sun_gen_type = OptixCSP::GenType::HALTON; - break; - default: - data_manager->launch_params_H.sun_gen_type = OptixCSP::GenType::UNKNOWN; + case (SolTrace::Data::GenType::RANDOM): + data_manager->launch_params_H.sun_gen_type = OptixCSP::GenType::RANDOM; + break; + case (SolTrace::Data::GenType::HALTON): + data_manager->launch_params_H.sun_gen_type = OptixCSP::GenType::HALTON; + break; + default: + data_manager->launch_params_H.sun_gen_type = OptixCSP::GenType::UNKNOWN; } // Assign sun shape parameters (if necessary) data_manager->launch_params_H.include_sun_shape_errors = this->m_include_sun_shape_errors; - data_manager->allocateSunUserData({}, {}); // Clear sun user data + data_manager->allocateSunUserData({}, {}); // Clear sun user data if (this->m_include_sun_shape_errors) { // Map SolTrace::Data::SunShape to OptixCSP::SunShape for device code @@ -208,136 +259,205 @@ void SolTraceSystem::initialize() data_manager->launch_params_H.sun_max_intensity = static_cast(m_sun->get_max_intensity()); } - Timer AABB_timer; - AABB_timer.start(); + m_timer_aabb.reset(); + m_timer_aabb.start(); geometry_manager->collect_geometry_info(m_element_list, data_manager->launch_params_H); - AABB_timer.stop(); + m_timer_aabb.stop(); - Timer geometry_timer; - geometry_timer.start(); + m_timer_geometry.reset(); + m_timer_geometry.start(); geometry_manager->create_geometries(data_manager->launch_params_H); - geometry_timer.stop(); + m_timer_geometry.stop(); // Pipeline setup. - Timer pipeline_timer; - pipeline_timer.start(); + m_timer_pipeline.reset(); + m_timer_pipeline.start(); + pipeline_manager->set_max_trace_depth(m_max_ray_depth); pipeline_manager->createPipeline(); - pipeline_timer.stop(); + m_timer_pipeline.stop(); - Timer sbt_timer; - sbt_timer.start(); + m_timer_sbt.reset(); + m_timer_sbt.start(); create_shader_binding_table(); - sbt_timer.stop(); + m_timer_sbt.stop(); // seed for randomization data_manager->launch_params_H.sun_dir_seed = m_seed; data_manager->launch_params_H.optical_errors = m_optical_errors; - // Create a CUDA stream for asynchronous operations. - CUDA_CHECK(cudaStreamCreate(&m_state.stream)); - // Link the GAS handle. data_manager->launch_params_H.handle = m_state.gas_handle; data_manager->allocateGeometryDataArray(geometry_manager->get_geometry_data_array()); data_manager->allocateMaterialDataArray(geometry_manager->get_material_data_array_front(), geometry_manager->get_material_data_array_back()); - + if (m_verbose) { - std::cout << "Time to compute AABB: " << AABB_timer.get_time_sec() << " seconds" << std::endl; - std::cout << "Time to create geometries: " << geometry_timer.get_time_sec() << " seconds" << std::endl; - std::cout << "Time to create pipeline: " << pipeline_timer.get_time_sec() << " seconds" << std::endl; - std::cout << "Time to create SBT: " << sbt_timer.get_time_sec() << " seconds" << std::endl; + std::cout << "Time to compute AABB: " << m_timer_aabb.get_time_sec() << " seconds" << std::endl; + std::cout << "Time to create geometries: " << m_timer_geometry.get_time_sec() << " seconds" << std::endl; + std::cout << "Time to create pipeline: " << m_timer_pipeline.get_time_sec() << " seconds" << std::endl; + std::cout << "Time to create SBT: " << m_timer_sbt.get_time_sec() << " seconds" << std::endl; print_launch_params(); } - data_manager->allocateLaunchParams(); + + // Snapshot free GPU memory now that all setup allocations (BVH, pipeline, + // SBT, geometry/material arrays, launch params) are complete but before any + // ray buffers exist. automatic_batch_size() uses this as a stable baseline + // so that batch sizing is consistent across every run() call. + // Memory used by setup = m_mem_free_before - m_mem_free_post_setup. + { + size_t mem_total; + CUDA_CHECK(cudaMemGetInfo(&m_mem_free_post_setup, &mem_total)); + } + m_timer_setup.stop(); } void SolTraceSystem::run() { - - // Initialize results vectors - m_hp_vec.clear(); - m_raynumber_vec.clear(); - m_element_id_vec.clear(); - m_hit_type_vec.clear(); + // Initialize results + m_hit_records.clear(); + m_hit_ray_ids.clear(); + m_n_hit_rays = 0; + m_n_sun_rays = 0; + m_n_depth_exceeded_rays = 0; uint_fast64_t N_ray_hit = 0; uint_fast64_t N_ray_gen = 0; + m_timer_trace.reset(); + m_timer_trace.start(); + m_timer_setup_buffer.reset(); + m_timer_optix_launch.reset(); + m_timer_collect_results.reset(); + m_n_run_iterations = 0; + m_compaction_timings = CompactionTimings{}; + + // Allocate device buffers and initialize RNG states once (sizes are constant across the while loop). + allocate_device_buffers(); + while (N_ray_hit < m_number_of_rays && N_ray_gen < m_max_number_of_rays) { + ++m_n_run_iterations; + // Update ray offset (pushed to device in setup_device_buffer) data_manager->launch_params_H.ray_offset = N_ray_gen; // Allocate buffer (sets data_manager->launch_params_H buffer) - setup_device_buffer(); + m_timer_setup_buffer.start(); + { + setup_device_buffer(); + } + m_timer_setup_buffer.stop(); int width = data_manager->launch_params_H.width; int height = data_manager->launch_params_H.height; - size_t m_mem_free_after; - size_t mem_total; + size_t mem_total; cudaMemGetInfo(&m_mem_free_after, &mem_total); - if(m_verbose) + if (m_verbose) std::cout << "Memory used by launch: " << (m_mem_free_before - m_mem_free_after) / (1024.0 * 1024.0) << " MB\n"; - m_timer_trace.start(); // Launch the simulation. - OPTIX_CHECK(optixLaunch( - m_state.pipeline, - m_state.stream, // Assume this stream is properly created. - reinterpret_cast(data_manager->getDeviceLaunchParams()), - sizeof(OptixCSP::LaunchParams), - &m_state.sbt, // Shader Binding Table. - width, // Launch dimensions - height, - 1)); - CUDA_SYNC_CHECK(); + m_timer_optix_launch.start(); + { + OPTIX_CHECK(optixLaunch( + m_state.pipeline, + m_state.stream, // Assume this stream is properly created. + reinterpret_cast(data_manager->getDeviceLaunchParams()), + sizeof(OptixCSP::LaunchParams), + &m_state.sbt, // Shader Binding Table. + width, // Launch dimensions + height, + 1)); + CUDA_SYNC_CHECK(); + } + m_timer_optix_launch.stop(); // Collect results - get_buffer_results(m_hp_vec, m_raynumber_vec, m_element_id_vec, m_hit_type_vec, - m_sunraynumber_vec); - N_ray_hit = m_raynumber_vec.empty() ? 0 : m_raynumber_vec.back(); + m_timer_collect_results.start(); + { + get_buffer_results(); + } + m_timer_collect_results.stop(); + + // Read back depth-exceeded count for this batch + uint64_t iter_depth_exceeded = 0; + CUDA_CHECK(cudaMemcpy(&iter_depth_exceeded, + data_manager->launch_params_H.d_depth_exceeded_count, + sizeof(uint64_t), cudaMemcpyDeviceToHost)); + m_n_depth_exceeded_rays += iter_depth_exceeded; + + N_ray_hit = m_n_hit_rays; N_ray_gen += width; + m_n_sun_rays = N_ray_gen; } - // Trim excess rays - if (N_ray_hit > m_number_of_rays) + // Trim excess rays: remove ray groups from the tail until m_n_hit_rays == m_number_of_rays. + // Each group starts at the last HIT_CREATE record in m_hit_records. + while (m_trim_excess_rays && m_n_hit_rays > m_number_of_rays && !m_hit_records.empty()) { - while (m_raynumber_vec.back() > m_number_of_rays) - { - m_hp_vec.pop_back(); - m_raynumber_vec.pop_back(); - m_element_id_vec.pop_back(); - m_hit_type_vec.pop_back(); - m_sunraynumber_vec.pop_back(); - } + // Walk backwards to find the last CREATE record + auto rit = std::find_if(m_hit_records.rbegin(), m_hit_records.rend(), + [](const HitRecord &r) + { return r.hit_type == HitType::HIT_CREATE; }); + if (rit == m_hit_records.rend()) + break; + m_hit_records.erase(std::prev(rit.base()), m_hit_records.end()); + m_hit_ray_ids.pop_back(); + --m_n_hit_rays; } + // m_n_sun_rays = rays generated up to and including the last retained hit ray. + if (!m_hit_ray_ids.empty()) + m_n_sun_rays = m_hit_ray_ids.back() + 1; m_timer_trace.stop(); + + if (m_n_depth_exceeded_rays > 0) + std::cout << "[SolTraceSystem] " << m_n_depth_exceeded_rays + << " ray(s) were terminated due to reaching max_depth (" + << static_cast(data_manager->launch_params_H.max_depth) << ").\n"; + + if (m_verbose) + { + const double t_setup = m_timer_setup_buffer.get_time_sec(); + const double t_launch = m_timer_optix_launch.get_time_sec(); + const double t_collect = m_timer_collect_results.get_time_sec(); + const double t_total = t_setup + t_launch + t_collect; + const double inv_n = m_n_run_iterations > 0 ? 1.0 / static_cast(m_n_run_iterations) : 0.0; + + std::cout << "\n--- SolTraceSystem::run() timing (" << m_n_run_iterations << " iteration" + << (m_n_run_iterations == 1 ? "" : "s") << ") ---\n"; + std::cout << std::fixed << std::setprecision(6); + std::cout << " setup_device_buffer : total = " << t_setup << " s" + << " avg = " << t_setup * inv_n << " s" + << " fraction = " << (t_total > 0.0 ? 100.0 * t_setup / t_total : 0.0) << " %\n"; + std::cout << " optixLaunch : total = " << t_launch << " s" + << " avg = " << t_launch * inv_n << " s" + << " fraction = " << (t_total > 0.0 ? 100.0 * t_launch / t_total : 0.0) << " %\n"; + std::cout << " get_buffer_results : total = " << t_collect << " s" + << " avg = " << t_collect * inv_n << " s" + << " fraction = " << (t_total > 0.0 ? 100.0 * t_collect / t_total : 0.0) << " %\n"; + std::cout << " total (3 sections) : " << t_total << " s\n"; + std::cout << "----------------------------------------------\n"; + } } void SolTraceSystem::update() { - const int N_slots = data_manager->launch_params_H.width * data_manager->launch_params_H.height * data_manager->launch_params_H.max_depth; - const size_t hit_point_buffer_size = N_slots * sizeof(float4); - const size_t element_id_size = N_slots * sizeof(int32_t); - const size_t hit_type_buffer_size = N_slots * sizeof(uint8_t); + const size_t N_slots = static_cast(data_manager->launch_params_H.width) * static_cast(data_manager->launch_params_H.height) * static_cast(data_manager->launch_params_H.max_depth); + const size_t hit_buffer_size = N_slots * sizeof(HitRecord); // update aabb and sun plane accordingly geometry_manager->update_geometry_info(m_element_list, data_manager->launch_params_H); // update data on the device data_manager->updateGeometryDataArray(geometry_manager->get_geometry_data_array()); - CUDA_CHECK(cudaMemset(data_manager->launch_params_H.hit_point_buffer, 0, hit_point_buffer_size)); - CUDA_CHECK(cudaMemset(data_manager->launch_params_H.element_id_buffer, kElementIdBuffer, element_id_size)); - CUDA_CHECK(cudaMemset(data_manager->launch_params_H.hit_type_buffer, HitType::HIT_UNASSIGNED, hit_type_buffer_size)); + CUDA_CHECK(cudaMemset(data_manager->launch_params_H.hit_buffer, 0, hit_buffer_size)); data_manager->updateLaunchParams(); } @@ -347,10 +467,25 @@ void SolTraceSystem::get_hp_output(std::vector &hp_vec, std::vector &element_id_vec, std::vector &hit_type_vec) { - hp_vec = m_hp_vec; - raynumber_vec = m_raynumber_vec; - element_id_vec = m_element_id_vec; - hit_type_vec = m_hit_type_vec; + hp_vec.clear(); + raynumber_vec.clear(); + element_id_vec.clear(); + hit_type_vec.clear(); + hp_vec.reserve(m_hit_records.size()); + raynumber_vec.reserve(m_hit_records.size()); + element_id_vec.reserve(m_hit_records.size()); + hit_type_vec.reserve(m_hit_records.size()); + + uint_fast64_t ray_number = 0; + for (const HitRecord &r : m_hit_records) + { + if (r.hit_type == HitType::HIT_CREATE) + ++ray_number; + hp_vec.push_back(r.hit_point); + raynumber_vec.push_back(ray_number); + element_id_vec.push_back(r.element_id); + hit_type_vec.push_back(r.hit_type); + } } void SolTraceSystem::clean_up() @@ -386,28 +521,17 @@ void SolTraceSystem::clean_up() CUDA_CHECK(cudaFree(reinterpret_cast(m_state.d_gas_output_buffer))); // Free device-side launch parameter memory - CUDA_CHECK(cudaFree(reinterpret_cast(data_manager->launch_params_H.hit_point_buffer))); - CUDA_CHECK(cudaFree(reinterpret_cast(data_manager->launch_params_H.element_id_buffer))); - CUDA_CHECK(cudaFree(reinterpret_cast(data_manager->launch_params_H.hit_type_buffer))); + CUDA_CHECK(cudaFree(reinterpret_cast(data_manager->launch_params_H.hit_buffer))); CUDA_CHECK(cudaFree(reinterpret_cast(data_manager->launch_params_H.sun_dir_buffer))); - data_manager->launch_params_H.hit_point_buffer = nullptr; - data_manager->launch_params_H.element_id_buffer = nullptr; - data_manager->launch_params_H.hit_type_buffer = nullptr; + data_manager->launch_params_H.hit_buffer = nullptr; data_manager->launch_params_H.sun_dir_buffer = nullptr; - m_hit_point_buffer_size_allocated = 0; - m_element_id_buffer_size_allocated = 0; - m_hit_type_buffer_size_allocated = 0; + m_hit_buffer_size_allocated = 0; m_sun_dir_buffer_size_allocated = 0; - data_manager->cleanup(); + free_compaction_scratch(m_compaction_scratch); - m_hp_output_buffer_host.clear(); - m_hp_output_buffer_host.shrink_to_fit(); - m_element_id_buffer_host.clear(); - m_element_id_buffer_host.shrink_to_fit(); - m_hit_type_buffer_host.clear(); - m_hit_type_buffer_host.shrink_to_fit(); + data_manager->cleanup(); m_state.context = nullptr; m_state.stream = nullptr; @@ -420,6 +544,10 @@ void SolTraceSystem::clean_up() m_state.gas_handle = 0; m_state.sbt = {}; m_state.d_gas_output_buffer = 0; + + m_mem_free_before = 0; + m_mem_free_post_setup = 0; + m_mem_free_after = 0; } void SolTraceSystem::reset() @@ -427,15 +555,11 @@ void SolTraceSystem::reset() clean_up(); m_element_list.clear(); - m_hp_vec.clear(); - m_raynumber_vec.clear(); - m_element_id_vec.clear(); - m_hit_type_vec.clear(); - m_sunraynumber_vec.clear(); - - m_hp_output_buffer_host.clear(); - m_element_id_buffer_host.clear(); - m_hit_type_buffer_host.clear(); + m_hit_records.clear(); + m_hit_ray_ids.clear(); + m_n_hit_rays = 0; + m_n_sun_rays = 0; + m_n_depth_exceeded_rays = 0; m_sun = nullptr; m_number_of_rays = 0; @@ -447,6 +571,11 @@ void SolTraceSystem::reset() // with their corresponding programs (ray generation, miss, and hit group). void SolTraceSystem::create_shader_binding_table() { + // Free any previously allocated SBT records to avoid leaks on re-initialization. + CUDA_CHECK(cudaFree(reinterpret_cast(m_state.sbt.raygenRecord))); + CUDA_CHECK(cudaFree(reinterpret_cast(m_state.sbt.missRecordBase))); + CUDA_CHECK(cudaFree(reinterpret_cast(m_state.sbt.hitgroupRecordBase))); + m_state.sbt = {}; // Ray generation program record { @@ -512,55 +641,6 @@ void SolTraceSystem::create_shader_binding_table() // initialize program handle and data OptixProgramGroup program_group_handle = pipeline_manager->getElementProgram(my_type); hitgroup_records_list[i].data.material_data = {0.875425, 0, 0, 0}; - // OptixProgramGroup program_group_handle = nullptr; - // SurfaceApertureMap map = {}; - - // switch (my_type) - // { - // case OptixCSP::OpticalEntityType::RECTANGLE_FLAT: - // map = {SurfaceType::FLAT, ApertureType::RECTANGLE}; - // program_group_handle = pipeline_manager->getElementProgram(map); - // hitgroup_records_list[i].data.material_data = {0.875425, 0, 0, 0}; - // printf("RECTANGLE_FLAT, program group address: %p \n", program_group_handle); - - // break; - - // case OptixCSP::OpticalEntityType::RECTANGLE_PARABOLIC: - // map = {SurfaceType::PARABOLIC, ApertureType::RECTANGLE}; - // program_group_handle = pipeline_manager->getElementProgram(map); - // hitgroup_records_list[i].data.material_data = {0.875425, 0, 0, 0}; - // printf("RECTANGLE_PARABOLIC, program group address: %p \n", program_group_handle); - - // break; - - // case OptixCSP::OpticalEntityType::CYLINDRICAL: - // map = {SurfaceType::CYLINDER, ApertureType::RECTANGLE}; - // program_group_handle = pipeline_manager->getElementProgram(map); - // hitgroup_records_list[i].data.material_data = {0.95, 0, 0, 0}; - // printf("CYLINDRICAL, program group address: %p \n", program_group_handle); - - // break; - - // case OptixCSP::OpticalEntityType::TRIANGLE_FLAT: - // map = {SurfaceType::FLAT, ApertureType::TRIANGLE}; - // program_group_handle = pipeline_manager->getElementProgram(map); - // hitgroup_records_list[i].data.material_data = {0.95, 0, 0, 0}; - // printf("FLAT_TRIANGLE, program group address: %p \n", program_group_handle); - - // break; - - // case OptixCSP::OpticalEntityType::QUADRILATERAL_FLAT: - // ma = {SurfaceType::FLAT, ApertureType::QUADRILATERAL}; - // program_group_handle = pipeline_manager->getElementProgram(map); - // hitgroup_records_list[i].data.material_data = {0.875425, 0, 0, 0}; - // printf("FLAT_QUADRILATERAL, program group address: %p \n", program_group_handle); - - // break; - - // default: - // std::cerr << "Unknown OpticalEntityType: " << my_type << std::endl; - // } - OPTIX_CHECK(optixSbtRecordPackHeader(program_group_handle, &hitgroup_records_list[i].header)); } @@ -585,144 +665,84 @@ void SolTraceSystem::create_shader_binding_table() } } -void SolTraceSystem::setup_device_buffer() +void SolTraceSystem::allocate_device_buffers() { - // Initialize launch params - data_manager->launch_params_H.width = m_number_of_rays; + // Set constant launch params (unchanged across the while loop). + const uint_fast64_t effective_batch = determine_batch_size(); + data_manager->launch_params_H.width = static_cast(effective_batch); data_manager->launch_params_H.height = 1; - data_manager->launch_params_H.max_depth = MAX_TRACE_DEPTH; + data_manager->launch_params_H.max_depth = m_max_ray_depth; - const size_t hit_point_buffer_size = data_manager->launch_params_H.width * data_manager->launch_params_H.height * sizeof(float4) * data_manager->launch_params_H.max_depth; - const size_t element_id_size = data_manager->launch_params_H.width * data_manager->launch_params_H.height * sizeof(int32_t) * data_manager->launch_params_H.max_depth; - const size_t hit_type_size = data_manager->launch_params_H.width * data_manager->launch_params_H.height * sizeof(uint8_t) * data_manager->launch_params_H.max_depth; - const size_t sun_dir_size = data_manager->launch_params_H.width * data_manager->launch_params_H.height * sizeof(float3); + const size_t hit_buffer_size = static_cast(data_manager->launch_params_H.width) * static_cast(data_manager->launch_params_H.height) * static_cast(data_manager->launch_params_H.max_depth) * sizeof(HitRecord); + const size_t sun_dir_size = static_cast(data_manager->launch_params_H.width) * static_cast(data_manager->launch_params_H.height) * sizeof(float3); - if (data_manager->launch_params_H.hit_point_buffer == nullptr || m_hit_point_buffer_size_allocated != hit_point_buffer_size) - { - if (data_manager->launch_params_H.hit_point_buffer != nullptr) - CUDA_CHECK(cudaFree(reinterpret_cast(data_manager->launch_params_H.hit_point_buffer))); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&data_manager->launch_params_H.hit_point_buffer), hit_point_buffer_size)); - m_hit_point_buffer_size_allocated = hit_point_buffer_size; - } - CUDA_CHECK(cudaMemset(data_manager->launch_params_H.hit_point_buffer, 0, hit_point_buffer_size)); - - if (data_manager->launch_params_H.element_id_buffer == nullptr || m_element_id_buffer_size_allocated != element_id_size) - { - if (data_manager->launch_params_H.element_id_buffer != nullptr) - CUDA_CHECK(cudaFree(reinterpret_cast(data_manager->launch_params_H.element_id_buffer))); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&data_manager->launch_params_H.element_id_buffer), element_id_size)); - m_element_id_buffer_size_allocated = element_id_size; - } - CUDA_CHECK(cudaMemset(data_manager->launch_params_H.element_id_buffer, kElementIdBuffer, element_id_size)); + // NOTE: cudaFree is nullptr safe - if (data_manager->launch_params_H.hit_type_buffer == nullptr || m_hit_type_buffer_size_allocated != hit_type_size) + if (data_manager->launch_params_H.hit_buffer == nullptr || m_hit_buffer_size_allocated != hit_buffer_size) { - if (data_manager->launch_params_H.hit_type_buffer != nullptr) - CUDA_CHECK(cudaFree(reinterpret_cast(data_manager->launch_params_H.hit_type_buffer))); - CUDA_CHECK(cudaMalloc(reinterpret_cast(&data_manager->launch_params_H.hit_type_buffer), hit_type_size)); - m_hit_type_buffer_size_allocated = hit_type_size; + CUDA_CHECK(cudaFree(reinterpret_cast(data_manager->launch_params_H.hit_buffer))); + CUDA_CHECK(cudaMalloc(reinterpret_cast(&data_manager->launch_params_H.hit_buffer), hit_buffer_size)); + m_hit_buffer_size_allocated = hit_buffer_size; + + // Reallocate compaction scratch whenever ray-buffer dimensions change + const uint64_t num_rays = data_manager->launch_params_H.width * data_manager->launch_params_H.height; + const uint64_t max_depth = static_cast(data_manager->launch_params_H.max_depth); + allocate_compaction_scratch(m_compaction_scratch, num_rays, max_depth); } - CUDA_CHECK(cudaMemset(data_manager->launch_params_H.hit_type_buffer, HitType::HIT_UNASSIGNED, hit_type_size)); if (data_manager->launch_params_H.sun_dir_buffer == nullptr || m_sun_dir_buffer_size_allocated != sun_dir_size) { - if (data_manager->launch_params_H.sun_dir_buffer != nullptr) - CUDA_CHECK(cudaFree(reinterpret_cast(data_manager->launch_params_H.sun_dir_buffer))); + CUDA_CHECK(cudaFree(reinterpret_cast(data_manager->launch_params_H.sun_dir_buffer))); CUDA_CHECK(cudaMalloc(reinterpret_cast(&data_manager->launch_params_H.sun_dir_buffer), sun_dir_size)); m_sun_dir_buffer_size_allocated = sun_dir_size; } - CUDA_CHECK(cudaMemset(data_manager->launch_params_H.sun_dir_buffer, 0, sun_dir_size)); - const unsigned int num_rng_states = static_cast(data_manager->launch_params_H.width * data_manager->launch_params_H.height); + // Initialize RNG states once (sizes are constant across the while loop). + // curand states are persistent on the device and advance naturally across kernel launches. + const unsigned int num_rng_states = static_cast( + data_manager->launch_params_H.width * data_manager->launch_params_H.height); data_manager->ensureCurandStates( num_rng_states, data_manager->launch_params_H.sun_dir_seed, - data_manager->launch_params_H.ray_offset, + 0, m_state.stream); - data_manager->updateLaunchParams(); + data_manager->ensureDepthExceededCounter(); } -// Collects results from device buffer -// only keeps rays that hit elements -void SolTraceSystem::get_buffer_results(std::vector &hp_vec, std::vector &raynumber_vec, - std::vector &element_id_vec, std::vector &hit_type_vec, - std::vector &sunraynumber_vec) +void SolTraceSystem::setup_device_buffer() { - const int max_depth = data_manager->launch_params_H.max_depth; - const int num_rays = data_manager->launch_params_H.width * data_manager->launch_params_H.height; - const int output_size = data_manager->launch_params_H.width * data_manager->launch_params_H.height * data_manager->launch_params_H.max_depth; - - if (static_cast(m_hp_output_buffer_host.size()) != output_size) - m_hp_output_buffer_host.resize(output_size); - if (static_cast(m_element_id_buffer_host.size()) != output_size) - m_element_id_buffer_host.resize(output_size); - if (static_cast(m_hit_type_buffer_host.size()) != output_size) - m_hit_type_buffer_host.resize(output_size); - - CUDA_CHECK(cudaMemcpy(m_hp_output_buffer_host.data(), data_manager->launch_params_H.hit_point_buffer, output_size * sizeof(float4), cudaMemcpyDeviceToHost)); - CUDA_CHECK(cudaMemcpy(m_element_id_buffer_host.data(), data_manager->launch_params_H.element_id_buffer, output_size * sizeof(int32_t), cudaMemcpyDeviceToHost)); - CUDA_CHECK(cudaMemcpy(m_hit_type_buffer_host.data(), data_manager->launch_params_H.hit_type_buffer, output_size * sizeof(uint8_t), cudaMemcpyDeviceToHost)); - - // Loop through each buffer slot - uint_fast64_t ray_number = raynumber_vec.empty() ? 0 : raynumber_vec.back(); - uint_fast64_t sunray_number = sunraynumber_vec.empty() ? 0 : sunraynumber_vec.back(); - for (int i = 0; i < output_size; ++i) - { - - // Get hit type - const uint8_t &hit_type = m_hit_type_buffer_host[i]; - - // Skip if empty - if (hit_type < HitType::HIT_CREATE || hit_type > HitType::HIT_EXIT) - { - continue; - } + const size_t hit_buffer_size = static_cast(data_manager->launch_params_H.width) * static_cast(data_manager->launch_params_H.height) * static_cast(data_manager->launch_params_H.max_depth) * sizeof(HitRecord); + const size_t sun_dir_size = static_cast(data_manager->launch_params_H.width) * static_cast(data_manager->launch_params_H.height) * sizeof(float3); - // If new ray, check if previous ray hit anything - if (hit_type == HitType::HIT_CREATE) - { - // Remove last ray if it has no hits - if (!hit_type_vec.empty() && hit_type_vec.back() == HitType::HIT_CREATE) - { - hp_vec.pop_back(); - raynumber_vec.pop_back(); - hit_type_vec.pop_back(); - element_id_vec.pop_back(); - sunraynumber_vec.pop_back(); - ray_number--; - } - - // New ray - ray_number++; - - // Sun ray number always increments, even if no hit - sunray_number++; - } - - // Get hit record, element_id - const float4 &hit_record = m_hp_output_buffer_host[i]; // [depth, pos x, pos y, pos z] - const int32_t &element_id = m_element_id_buffer_host[i]; - - // Collect results - hp_vec.push_back(hit_record); - raynumber_vec.push_back(ray_number); - hit_type_vec.push_back(hit_type); - element_id_vec.push_back(element_id); - sunraynumber_vec.push_back(sunray_number); - } + CUDA_CHECK(cudaMemset(data_manager->launch_params_H.hit_buffer, 0, hit_buffer_size)); + CUDA_CHECK(cudaMemset(data_manager->launch_params_H.sun_dir_buffer, 0, sun_dir_size)); + CUDA_CHECK(cudaMemset(data_manager->launch_params_H.d_depth_exceeded_count, 0, sizeof(uint64_t))); - // Remove last ray if it is only CREATE - if (!hit_type_vec.empty() && hit_type_vec.back() == HitType::HIT_CREATE) - { - hp_vec.pop_back(); - raynumber_vec.pop_back(); - element_id_vec.pop_back(); - hit_type_vec.pop_back(); - sunraynumber_vec.pop_back(); - } + data_manager->updateLaunchParams(); +} - return; +// Compacts the device hit buffer on the GPU, then copies only qualifying records to host. +// Rays that produced only a HIT_CREATE event (missed all elements) are discarded. +// Empty depth slots are discarded. The compacted HitRecord array is appended to +// m_hit_records and m_n_hit_rays is incremented by the number of newly collected hit rays. +void SolTraceSystem::get_buffer_results() +{ + const uint32_t num_rays = static_cast(data_manager->launch_params_H.width * + data_manager->launch_params_H.height); + const uint32_t max_depth = static_cast(data_manager->launch_params_H.max_depth); + + const uint32_t n_new_hits = gpu_compact_hit_buffer( + data_manager->launch_params_H.hit_buffer, + num_rays, + max_depth, + data_manager->launch_params_H.ray_offset, + m_hit_records, + m_hit_ray_ids, + m_state.stream, + m_compaction_scratch, + &m_compaction_timings); + m_n_hit_rays += n_new_hits; } void SolTraceSystem::add_element(std::shared_ptr e) @@ -740,6 +760,89 @@ double SolTraceSystem::get_time_setup() return m_timer_setup.get_time_sec(); } +void SolTraceSystem::print_timing() const +{ + const double t_setup = m_timer_setup.get_time_sec(); + const double t_aabb = m_timer_aabb.get_time_sec(); + const double t_geometry = m_timer_geometry.get_time_sec(); + const double t_pipeline = m_timer_pipeline.get_time_sec(); + const double t_sbt = m_timer_sbt.get_time_sec(); + + const double t_trace = m_timer_trace.get_time_sec(); + const double t_buf_setup = m_timer_setup_buffer.get_time_sec(); + const double t_launch = m_timer_optix_launch.get_time_sec(); + const double t_collect = m_timer_collect_results.get_time_sec(); + + const double inv_n = m_n_run_iterations > 0 + ? 1.0 / static_cast(m_n_run_iterations) + : 0.0; + + const auto pct = [](double num, double denom) -> double + { + return denom > 0.0 ? 100.0 * num / denom : 0.0; + }; + + std::cout << std::fixed << std::setprecision(6); + std::cout << "\n=== SolTraceSystem Timing Summary ===\n"; + + std::cout << "\n--- initialize() ---\n"; + std::cout << " AABB computation : " << t_aabb << " s (" << pct(t_aabb, t_setup) << " %)\n"; + std::cout << " Geometry creation : " << t_geometry << " s (" << pct(t_geometry, t_setup) << " %)\n"; + std::cout << " Pipeline creation : " << t_pipeline << " s (" << pct(t_pipeline, t_setup) << " %)\n"; + std::cout << " SBT creation : " << t_sbt << " s (" << pct(t_sbt, t_setup) << " %)\n"; + std::cout << " Total setup : " << t_setup << " s\n"; + + std::cout << "\n--- run() [" << m_n_run_iterations + << " iteration" << (m_n_run_iterations == 1 ? "" : "s") << "] ---\n"; + std::cout << " Setup device buffer : total = " << t_buf_setup << " s" + << " avg/iter = " << t_buf_setup * inv_n << " s" + << " (" << pct(t_buf_setup, t_trace) << " %)\n"; + std::cout << " OptiX launch : total = " << t_launch << " s" + << " avg/iter = " << t_launch * inv_n << " s" + << " (" << pct(t_launch, t_trace) << " %)\n"; + std::cout << " Collect results : total = " << t_collect << " s" + << " avg/iter = " << t_collect * inv_n << " s" + << " (" << pct(t_collect, t_trace) << " %)\n"; + if (m_compaction_timings.n_calls > 0) + { + const float inv_c = 1.0f / static_cast(m_compaction_timings.n_calls); + std::cout << std::fixed << std::setprecision(4); + std::cout << " GPU pass 1 (count/scan/reduce) : total = " << m_compaction_timings.gpu_phase1_ms << " ms" + << " avg/call = " << m_compaction_timings.gpu_phase1_ms * inv_c << " ms\n"; + std::cout << " D->H scalars (3x memcpy) : total = " << m_compaction_timings.scalar_dth_ms << " ms" + << " avg/call = " << m_compaction_timings.scalar_dth_ms * inv_c << " ms\n"; + std::cout << " GPU pass 2 (compact/select) : total = " << m_compaction_timings.gpu_phase2_ms << " ms" + << " avg/call = " << m_compaction_timings.gpu_phase2_ms * inv_c << " ms\n"; + std::cout << " D->H bulk (records+ids) : total = " << m_compaction_timings.bulk_dth_ms << " ms" + << " avg/call = " << m_compaction_timings.bulk_dth_ms * inv_c << " ms\n"; + std::cout << std::fixed << std::setprecision(6); + } + std::cout << " Total trace : " << t_trace << " s\n"; + + std::cout << "\n--- Grand Total ---\n"; + std::cout << " Setup + Trace : " << (t_setup + t_trace) << " s\n"; + + std::cout << "\n--- GPU Memory Usage ---\n"; + constexpr double kMB = 1.0 / (1024.0 * 1024.0); + if (m_mem_free_before > 0) + { + std::cout << std::fixed << std::setprecision(2); + std::cout << " Free before setup : " << m_mem_free_before * kMB << " MB\n"; + if (m_mem_free_post_setup > 0) + { + std::cout << " Free after setup : " << m_mem_free_post_setup * kMB << " MB\n"; + std::cout << " Setup structures : " << (m_mem_free_before - m_mem_free_post_setup) * kMB << " MB\n"; + if (m_mem_free_after > 0) + { + std::cout << " Ray buffers : " << (m_mem_free_post_setup - m_mem_free_after) * kMB << " MB\n"; + std::cout << " Total used : " << (m_mem_free_before - m_mem_free_after) * kMB << " MB\n"; + } + } + std::cout << std::fixed << std::setprecision(6); + } + std::cout << "=====================================\n"; +} + double SolTraceSystem::get_sun_plane_area() const { const LaunchParams &lp = data_manager->launch_params_H; @@ -752,3 +855,77 @@ double SolTraceSystem::get_sun_plane_area() const a.x * b.y - a.y * b.x); return static_cast(sqrtf(cross.x * cross.x + cross.y * cross.y + cross.z * cross.z)); } + +uint_fast64_t SolTraceSystem::automatic_batch_size() const +{ + // Use the free-memory snapshot taken at the end of initialize(), after all + // setup allocations (BVH, pipeline, SBT, etc.) but before any ray buffers. + // This gives a stable baseline that does not shrink on subsequent run() calls + // due to the already-allocated (and reused) ray buffers being counted as used. + const size_t mem_free = m_mem_free_post_setup; + + // Reserve 20 % headroom for OptiX internal allocations, memory + // fragmentation, and any other transient allocations during launch. + constexpr double kUsableFraction = 0.80; + const size_t usable_bytes = static_cast( + static_cast(mem_free) * kUsableFraction); + + // Per-ray device memory charged by allocate_device_buffers() and + // allocate_compaction_scratch(): + // hit_buffer DEFAULT_MAX_TRACE_DEPTH * sizeof(HitRecord) -- trace output + // d_compacted DEFAULT_MAX_TRACE_DEPTH * sizeof(HitRecord) -- worst-case compacted copy + // sun_dir_buffer sizeof(float3) -- sun ray direction + // curand states sizeof(curandState) -- RNG state + // d_offsets sizeof(uint64_t) -- compaction prefix sum / global ray IDs + // d_count sizeof(uint8_t) -- compaction hit count (bounded by DEFAULT_MAX_TRACE_DEPTH <= 255) + // d_has_hit sizeof(uint8_t) -- per-ray hit flag + const size_t bytes_per_ray = + 2u * m_max_ray_depth * sizeof(HitRecord) + sizeof(float3) + sizeof(curandState) + sizeof(uint64_t) + 2u * sizeof(uint8_t); + + const uint_fast64_t computed = + (bytes_per_ray > 0) ? static_cast(usable_bytes / bytes_per_ray) : 0u; + + // Cap at int max / m_max_ray_depth (OptiX launch width is signed int). + uint_fast64_t batch_size = std::min( + computed, + static_cast(std::numeric_limits::max() / m_max_ray_depth)); + + if (m_verbose) + { + std::cout << "automatic_batch_size:" + << " free=" << mem_free / (1024.0 * 1024.0) << " MB" + << ", usable=" << usable_bytes / (1024.0 * 1024.0) << " MB" + << ", bytes_per_ray=" << bytes_per_ray + << ", batch_size=" << batch_size << "\n"; + } + + return batch_size; +} + +uint_fast64_t SolTraceSystem::determine_batch_size() const +{ + // Estimates number of rays that can be traced in a single batch based on + // available GPU memory. + uint_fast64_t batch_size = automatic_batch_size(); + + if (m_batch_size > 0) + { + if (m_batch_size > batch_size && batch_size > 0) + { + std::cerr << "[SolTraceSystem] WARNING: user-supplied batch_size (" + << m_batch_size + << ") exceeds the GPU-memory-safe automatic batch size (" + << batch_size + << "). This may cause device out-of-memory errors or " + "degraded GPU performance.\n"; + } + batch_size = m_batch_size; + } + else + { + // Take the smaller of the automatic batch_size and number of rays? + batch_size = batch_size > 0 ? std::min(batch_size, m_number_of_rays) : m_number_of_rays; + } + + return batch_size; +} diff --git a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/soltrace_system.h b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/soltrace_system.h index 9fcbbb4c..0e13fd1a 100644 --- a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/soltrace_system.h +++ b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/soltrace_system.h @@ -1,22 +1,25 @@ #pragma once +#include +#include +#include +#include +#include #include #include -#include -#include -#include - - #include "core/soltrace_state.h" // SoltraceState -#include "core/vec3d.h" // Vec3d +#include "core/vec3d.h" // Vec3d #include "core/timer.h" -#include "core/CspElement.h" // CspElement -#include "core/Surface.h" // Surface and derived classes +#include "core/CspElement.h" // CspElement +#include "core/Surface.h" // Surface and derived classes +#include "shaders/Soltrace.h" // HitRecord, HitType +#include "ray_utils.h" // CompactionScratch #include "../../../../../simulation_data/simulation_data_export.hpp" -namespace OptixCSP { +namespace OptixCSP +{ class GeometryManager; class pipelineManager; @@ -26,17 +29,16 @@ namespace OptixCSP { class Surface; static constexpr SolTrace::Data::SunShape kSupportedSunshapes[] = { - SolTrace::Data::SunShape::GAUSSIAN, - SolTrace::Data::SunShape::PILLBOX, - SolTrace::Data::SunShape::BUIE_CSR, - SolTrace::Data::SunShape::LIMBDARKENED, - SolTrace::Data::SunShape::USER_DEFINED - }; + SolTrace::Data::SunShape::GAUSSIAN, + SolTrace::Data::SunShape::PILLBOX, + SolTrace::Data::SunShape::BUIE_CSR, + SolTrace::Data::SunShape::LIMBDARKENED, + SolTrace::Data::SunShape::USER_DEFINED}; - class SolTraceSystem { + class SolTraceSystem + { public: - SolTraceSystem(); ~SolTraceSystem(); @@ -50,8 +52,8 @@ namespace OptixCSP { void update(); // Get all hit points - void get_hp_output(std::vector& hp_vec, std::vector& raynumber_vec, std::vector& element_id_vec, - std::vector& hit_type_vec); + void get_hp_output(std::vector &hp_vec, std::vector &raynumber_vec, std::vector &element_id_vec, + std::vector &hit_type_vec); /// Explicit cleanup void clean_up(); @@ -66,14 +68,31 @@ namespace OptixCSP { /// /// void set_number_of_rays(uint_fast64_t nrays, uint_fast64_t maxrays) - { + { m_number_of_rays = nrays; m_max_number_of_rays = maxrays; } - void set_sun(SolTrace::Data::Sun* sun) { m_sun = sun; } + /// Set the number of rays launched per iteration. + /// Use 0 (default) to let determine_batch_size() automatically compute a + /// batch size that fits the ray-data buffers in available GPU memory. + /// Throws std::out_of_range if batch_size exceeds the maximum int value. + void set_batch_size(uint_fast64_t batch_size) + { + if (batch_size > static_cast(std::numeric_limits::max())) + throw std::out_of_range("batch_size exceeds std::numeric_limits::max()"); + m_batch_size = batch_size; + } + uint_fast64_t get_batch_size() const { return m_batch_size; } + + /// Set the maximum ray interaction depth. Must be called before initialize(). + /// Values are clamped to [2, 255]. Defaults to DEFAULT_MAX_TRACE_DEPTH = 5. + void set_max_ray_depth(uint64_t depth); + uint8_t get_max_ray_depth() const { return m_max_ray_depth; } + + void set_sun(SolTrace::Data::Sun *sun) { m_sun = sun; } - void set_seed(uint64_t seed) { m_seed = seed; } // Set sun seed + void set_seed(uint64_t seed) { m_seed = seed; } // Set sun seed void set_optical_errors(bool include_optical_errors) { @@ -88,6 +107,10 @@ namespace OptixCSP { double get_time_trace(); double get_time_setup(); + /// Print a formatted summary of all timing information collected during + /// the last initialize() and run() calls. + void print_timing() const; + void print_launch_params(); /// @@ -96,75 +119,117 @@ namespace OptixCSP { /// double get_sun_plane_area() const; - uint_fast64_t get_N_sun_rays() - { - if (m_sunraynumber_vec.empty()) - return 0; - return m_sunraynumber_vec.back(); - } + uint_fast64_t get_N_sun_rays() const { return m_n_sun_rays; } + + /// Returns the number of run() iterations executed during the last run() call. + uint64_t get_N_run_iterations() const { return m_n_run_iterations; } + + /// Returns the compacted hit records (CREATE + hits, misses excluded). + const std::vector &get_hit_records() const { return m_hit_records; } - std::vector get_sunraynumber_vec() const { return m_sunraynumber_vec; } + /// Returns the number of rays that hit at least one element. + uint_fast64_t get_N_hit_rays() const { return m_n_hit_rays; } + + /// Returns the number of rays terminated by max depth (excludes absorption at max depth). + uint_fast64_t get_N_depth_exceeded_rays() const { return m_n_depth_exceeded_rays; } void set_sun_shape_errors(bool flag) { this->m_include_sun_shape_errors = flag; } - + /// Enable or disable trimming excess rays at the end of run() so that + /// exactly m_number_of_rays hit rays are returned. Enabled by default. + void set_trim_excess_rays(bool trim) { m_trim_excess_rays = trim; } + bool get_trim_excess_rays() const { return m_trim_excess_rays; } private: + // m_verbose and m_state must be declared before the shared_ptr managers so + // that they are initialized first (C++ initializes members in declaration order). + // GeometryManager and pipelineManager store references/copies of these at + // construction time, so they must be valid when the shared_ptrs are built. + bool m_verbose = false; + OptixCSP::SoltraceState m_state; std::shared_ptr geometry_manager; std::shared_ptr pipeline_manager; - std::shared_ptr data_manager; + std::shared_ptr data_manager; uint_fast64_t m_number_of_rays; uint_fast64_t m_max_number_of_rays; - - bool m_verbose; + uint_fast64_t m_batch_size = 0; // 0 means auto-size: determine_batch_size() calls automatic_batch_size() + uint8_t m_max_ray_depth = DEFAULT_MAX_TRACE_DEPTH; + uint_fast64_t m_n_depth_exceeded_rays = 0; // rays stopped by max depth, not absorption // Sun - //OptixCSP::Vec3d m_sun_vector; - //double m_sun_angle; - - SolTrace::Data::Sun* m_sun; - bool m_include_sun_shape_errors = false; + // OptixCSP::Vec3d m_sun_vector; + // double m_sun_angle; + SolTrace::Data::Sun *m_sun; + bool m_include_sun_shape_errors = false; + bool m_trim_excess_rays = true; uint64_t m_seed = 123456ULL; - bool m_optical_errors; - OptixCSP::SoltraceState m_state; + bool m_optical_errors = false; // Results - // Contains information on rays that hit objects - std::vector m_hp_vec; - std::vector m_raynumber_vec; - std::vector m_element_id_vec; - std::vector m_hit_type_vec; - std::vector m_sunraynumber_vec; // This is ID of hit rays out of all generated rays + // Compacted hit records: one contiguous array of HitRecord. + // Each ray group starts with a HIT_CREATE record followed by its hits. + // Rays that produced no hits (CREATE-only) are excluded. + std::vector m_hit_records; - // Reused host-side scratch buffers for copying launch results back from device. - std::vector m_hp_output_buffer_host; - std::vector m_element_id_buffer_host; - std::vector m_hit_type_buffer_host; + // Global ray index (ray_offset + local_index) for each logical hit ray in m_hit_records. + // Parallel to the logical rays (not records): m_hit_ray_ids.size() == m_n_hit_rays. + std::vector m_hit_ray_ids; + + // Count of rays that produced at least one non-CREATE hit. + uint_fast64_t m_n_hit_rays = 0; + + // Total rays generated (launched from the sun plane) across all run() iterations. + uint_fast64_t m_n_sun_rays = 0; // Current allocated device launch buffer sizes. - size_t m_hit_point_buffer_size_allocated = 0; - size_t m_element_id_buffer_size_allocated = 0; - size_t m_hit_type_buffer_size_allocated = 0; + size_t m_hit_buffer_size_allocated = 0; size_t m_sun_dir_buffer_size_allocated = 0; + // Pre-allocated device scratch buffers for GPU stream compaction. + CompactionScratch m_compaction_scratch; + CompactionTimings m_compaction_timings; + std::vector> m_element_list; void create_shader_binding_table(); + void allocate_device_buffers(); void setup_device_buffer(); - void get_buffer_results(std::vector& hp_vec, std::vector& raynumber_vec, - std::vector& element_id_vec, std::vector& hit_type_vec, - std::vector& sunraynumber_vec); + // GPU-side compaction: count hits, compact buffer on device, copy result to m_hit_records. + // Increments m_n_hit_rays by the number of newly collected hit rays. + void get_buffer_results(); + /// Computes the maximum rays-per-batch that fit in 80 % of current free + /// GPU memory, accounting for all per-ray device buffers and compaction + /// scratch. Returns 0 if memory cannot be queried. + uint_fast64_t automatic_batch_size() const; + /// Returns the effective batch size for a run() call. + /// If m_batch_size > 0 the user-supplied value is used as-is. + /// Otherwise automatic_batch_size() is called and the result is capped + /// at m_number_of_rays. + uint_fast64_t determine_batch_size() const; Timer m_timer_setup; Timer m_timer_trace; - // memory usage - size_t m_mem_free_before; - size_t m_mem_free_after; + // initialize() sub-timers + Timer m_timer_aabb; + Timer m_timer_geometry; + Timer m_timer_pipeline; + Timer m_timer_sbt; + // run() sub-timers + Timer m_timer_setup_buffer; + Timer m_timer_optix_launch; + Timer m_timer_collect_results; + uint64_t m_n_run_iterations; + // memory usage + size_t m_mem_free_before; ///< Free GPU memory at the start of initialize(), before any setup allocations. + size_t m_mem_free_post_setup; ///< Free GPU memory at the end of initialize(), after all setup allocations (BVH, + /// pipeline, SBT, geometry/material arrays). Used as the baseline in + /// automatic_batch_size() so batch sizing is stable across run() calls. + size_t m_mem_free_after; ///< Free GPU memory sampled during run() for per-launch memory reporting. }; } diff --git a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/soltrace_type.h b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/soltrace_type.h index e10ad3cc..d98569a8 100644 --- a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/soltrace_type.h +++ b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/soltrace_type.h @@ -10,7 +10,9 @@ namespace OptixCSP RECTANGLE, CIRCLE, TRIANGLE, - QUADRILATERAL + QUADRILATERAL, + HEXAGON, + ANNULUS }; // types for both scene building and pipeline assembly diff --git a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/sun_utils.cu b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/sun_utils.cu index 739c2075..798b288d 100644 --- a/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/sun_utils.cu +++ b/coretrace/simulation_runner/optix_runner/OptixCSP/src/core/sun_utils.cu @@ -6,7 +6,6 @@ #include "shaders/Soltrace.h" #include -#include namespace OptixCSP { diff --git a/coretrace/simulation_runner/optix_runner/OptixCSP/src/shaders/GeometryDataST.h b/coretrace/simulation_runner/optix_runner/OptixCSP/src/shaders/GeometryDataST.h index 5abd5009..217e64dd 100644 --- a/coretrace/simulation_runner/optix_runner/OptixCSP/src/shaders/GeometryDataST.h +++ b/coretrace/simulation_runner/optix_runner/OptixCSP/src/shaders/GeometryDataST.h @@ -8,10 +8,11 @@ #define assert(x) /*nop*/ #endif -// TODO: get rid of ST suffix, no clue what it was for ... +// TODO: get rid of ST suffix, no clue what it was for ... -namespace OptixCSP { - struct GeometryDataST +namespace OptixCSP +{ + struct GeometryDataST { enum Type { @@ -20,20 +21,21 @@ namespace OptixCSP { RECTANGLE_PARABOLIC = 2, UNKNOWN_TYPE = 3, RECTANGLE_FLAT = 4, - TRIANGLE_FLAT = 5, - QUADRILATERAL_FLAT = 6 + TRIANGLE_FLAT = 5, + QUADRILATERAL_FLAT = 6, + CIRCLE_FLAT = 7, + HEXAGON_FLAT = 8, + ANNULUS_FLAT = 9 }; struct Parallelogram { Parallelogram() = default; Parallelogram(float3 v1, float3 v2, float3 anchor) - : v1(v1) - , v2(v2) - , anchor(anchor) + : v1(v1), v2(v2), anchor(anchor) { float3 normal = normalize(cross(v1, v2)); - float d = dot(normal, anchor); + float d = dot(normal, anchor); this->v1 *= 1.0f / dot(v1, v1); this->v2 *= 1.0f / dot(v2, v2); plane = make_float4(normal, d); @@ -52,7 +54,7 @@ namespace OptixCSP { : center(center), x(x), y(y), width(width), height(height) { float3 normal = normalize(cross(x, y)); - float d = dot(normal, center); + float d = dot(normal, center); plane = make_float4(normal, d); } @@ -64,34 +66,28 @@ namespace OptixCSP { float height; }; - struct Cylinder_Y { + struct Cylinder_Y + { Cylinder_Y() = default; Cylinder_Y(float3 center, float radius, float half_height, float3 base_x, float3 base_z) - : center(center) - , radius(radius) - , half_height(half_height) - , base_x(base_x) - , base_z(base_z) { + : center(center), radius(radius), half_height(half_height), base_x(base_x), base_z(base_z) + { assert(dot(base_x, base_z) < 1e-3f); } - float3 center; float radius; float half_height; - float3 base_x; // x axis of the cylinder - float3 base_z; // z axis of the cylinder + float3 base_x; // x axis of the cylinder + float3 base_z; // z axis of the cylinder }; - struct Rectangle_Parabolic { + struct Rectangle_Parabolic + { Rectangle_Parabolic() = default; Rectangle_Parabolic(float3 v1, float3 v2, float3 anchor, float curv_x, float curv_y) - : v1(v1) - , v2(v2) - , anchor(anchor) - , curv_x(curv_x) - , curv_y(curv_y) + : v1(v1), v2(v2), anchor(anchor), curv_x(curv_x), curv_y(curv_y) { float3 normal = normalize(cross(v1, v2)); float d = dot(normal, anchor); @@ -104,14 +100,15 @@ namespace OptixCSP { float3 v1; float3 v2; float3 anchor; - //float3 focus; + // float3 focus; float curv_x; float curv_y; }; - struct Triangle_Flat { + struct Triangle_Flat + { Triangle_Flat() = default; - Triangle_Flat(const float3& a, const float3& b, const float3& c) + Triangle_Flat(const float3 &a, const float3 &b, const float3 &c) : v0(a), e1(b - a), e2(c - a) { normal = normalize(cross(e1, e2)); @@ -120,10 +117,11 @@ namespace OptixCSP { float3 v0; // base vertex float3 e1, e2; // edges float3 normal; - float d; // plane distance + float d; // plane distance }; - struct Quadrilateral_Flat{ + struct Quadrilateral_Flat + { Quadrilateral_Flat() = default; Quadrilateral_Flat(const float3 &a, const float3 &b, const float3 &c, const float3 &d) @@ -134,75 +132,118 @@ namespace OptixCSP { normal = normalize(cross(e1, e2)); } float3 p0, p1, p2, p3; // Vertices in counterclockwise order - float3 normal; // Positive direction follows right-hand rule + float3 normal; // Positive direction follows right-hand rule + }; + + struct Circle_Flat + { + Circle_Flat() = default; + // Circle_Flat(const float radius) : r(radius) {} + Circle_Flat(const float3 &origin, const float3 &normal, const float &radius) + : r(radius), center(origin) + { + plane = make_float4(normalize(normal), dot(center, normal)); + } + float4 plane; + float3 center; + float r; + }; + + struct Hexagon_Flat + { + Hexagon_Flat() = default; + Hexagon_Flat(const float3 &origin, const float3 &normal, const float &side_length) + : s(side_length), center(origin) + { + plane = make_float4(normalize(normal), dot(center, normal)); + } + float4 plane; + float3 center; + float s; + }; + + struct Annulus_Flat + { + Annulus_Flat() = default; + Annulus_Flat(const float3 &origin, const float3 &normal, + const float &r_inner, const float &r_outer, const float &arc) + : center(origin), ri(r_inner), ro(r_outer), arc(arc) + { + plane = make_float4(normalize(normal), dot(center, normal)); + } + float4 plane; + float3 center; + float ri; + float ro; + float arc; // Arc angle in radians with x-axis in the middle }; GeometryDataST() = default; - void setParallelogram(const Parallelogram& p) + void setParallelogram(const Parallelogram &p) { assert(type == UNKNOWN_TYPE); type = PARALLELOGRAM; parallelogram = p; } - __host__ __device__ const Parallelogram& getParallelogram() const + __host__ __device__ const Parallelogram &getParallelogram() const { assert(type == PARALLELOGRAM); return parallelogram; } - void setRectangle_Flat(const Rectangle_Flat& r) + void setRectangle_Flat(const Rectangle_Flat &r) { assert(type == UNKNOWN_TYPE); type = RECTANGLE_FLAT; rectangle_flat = r; } - __host__ __device__ const Rectangle_Flat& getRectangle_Flat() const + __host__ __device__ const Rectangle_Flat &getRectangle_Flat() const { assert(type == RECTANGLE_FLAT); return rectangle_flat; } - void setCylinder_Y(const Cylinder_Y& c) + void setCylinder_Y(const Cylinder_Y &c) { assert(type == UNKNOWN_TYPE); type = CYLINDER_Y; cylinder_y = c; } - __host__ __device__ const Cylinder_Y& getCylinder_Y() const + __host__ __device__ const Cylinder_Y &getCylinder_Y() const { assert(type == CYLINDER_Y); return cylinder_y; } - void setRectangleParabolic(const Rectangle_Parabolic& r) + void setRectangleParabolic(const Rectangle_Parabolic &r) { assert(type == UNKNOWN_TYPE); type = RECTANGLE_PARABOLIC; rectangle_parabolic = r; } - __host__ __device__ const Rectangle_Parabolic& getRectangleParabolic() const + __host__ __device__ const Rectangle_Parabolic &getRectangleParabolic() const { assert(type == RECTANGLE_PARABOLIC); return rectangle_parabolic; } - void setTriangle_Flat(const Triangle_Flat& t) + void setTriangle_Flat(const Triangle_Flat &t) { assert(type == UNKNOWN_TYPE); type = TRIANGLE_FLAT; triangle_flat = t; - } + } - __host__ __device__ const Triangle_Flat& getTriangle_Flat() const + __host__ __device__ const Triangle_Flat &getTriangle_Flat() const { assert(type == TRIANGLE_FLAT); return triangle_flat; - } + } void setQuadrilateral_Flat(const Quadrilateral_Flat &q) { @@ -211,12 +252,50 @@ namespace OptixCSP { quadrilateral_flat = q; } - __host__ __device__ const Quadrilateral_Flat& getQuadrilateral_Flat() const + __host__ __device__ const Quadrilateral_Flat &getQuadrilateral_Flat() const { assert(type == QUADRILATERAL_FLAT); return quadrilateral_flat; } + void setCircle_Flat(const Circle_Flat &c) + { + assert(type == UNKNOWN_TYPE); + type = CIRCLE_FLAT; + circle_flat = c; + } + + __host__ __device__ const Circle_Flat &getCircle_Flat() const + { + assert(type == CIRCLE_FLAT); + return circle_flat; + } + + void setHexagon_Flat(const Hexagon_Flat &h) + { + assert(type == UNKNOWN_TYPE); + type = HEXAGON_FLAT; + hexagon_flat = h; + } + + __host__ __device__ const Hexagon_Flat &getHexagon_Flat() const + { + assert(type == HEXAGON_FLAT); + return hexagon_flat; + } + + void setAnnulus_Flat(const Annulus_Flat &anf) + { + assert(type == UNKNOWN_TYPE); + type = ANNULUS_FLAT; + annulus_flat = anf; + } + + __host__ __device__ const Annulus_Flat &getAnnulus_Flat() const + { + assert(type == ANNULUS_FLAT); + return annulus_flat; + } Type type = UNKNOWN_TYPE; @@ -229,8 +308,11 @@ namespace OptixCSP { Cylinder_Y cylinder_y; Rectangle_Parabolic rectangle_parabolic; Rectangle_Flat rectangle_flat; - Triangle_Flat triangle_flat; + Triangle_Flat triangle_flat; Quadrilateral_Flat quadrilateral_flat; + Circle_Flat circle_flat; + Hexagon_Flat hexagon_flat; + Annulus_Flat annulus_flat; }; }; } diff --git a/coretrace/simulation_runner/optix_runner/OptixCSP/src/shaders/Soltrace.h b/coretrace/simulation_runner/optix_runner/OptixCSP/src/shaders/Soltrace.h index 42e3838b..1c4c87ab 100644 --- a/coretrace/simulation_runner/optix_runner/OptixCSP/src/shaders/Soltrace.h +++ b/coretrace/simulation_runner/optix_runner/OptixCSP/src/shaders/Soltrace.h @@ -4,6 +4,7 @@ #include "MaterialDataST.h" #include "soltrace_constants.h" +#include #include #include #include @@ -12,13 +13,22 @@ namespace OptixCSP{ const unsigned int NUM_ATTRIBUTE_VALUES = 4u; const unsigned int NUM_PAYLOAD_VALUES = 2u; - const unsigned int MAX_TRACE_DEPTH = 5u; + // NOTE: Maximum number of ray interactions in tracing with the geometry is + // DEFAULT_MAX_TRACE_DEPTH - 1 (so currently 4). See the end of the function + // __closesthit__element in materials.cu. Note the type. Limited to 255. + const uint8_t DEFAULT_MAX_TRACE_DEPTH = 5u; struct HitGroupData { MaterialData material_data; }; + struct HitRecord { + float4 hit_point; + int32_t element_id; + uint8_t hit_type; + }; + enum RayType { RAY_TYPE_RADIANCE = 0, @@ -31,6 +41,9 @@ namespace OptixCSP{ CYLINDRICAL = 2, TRIANGLE_FLAT = 3, QUADRILATERAL_FLAT = 4, + CIRCLE_FLAT = 5, + HEXAGON_FLAT = 6, + ANNULUS_FLAT = 7, NUM_OPTICAL_ENTITY_TYPES }; @@ -40,15 +53,18 @@ namespace OptixCSP{ unsigned int width; // essentially number of rays launched and sun points unsigned int height; - int max_depth; - unsigned int ray_offset; // Global offset for current branch + unsigned int max_depth; + unsigned long long ray_offset; // Global offset for current branch - float4* hit_point_buffer; + // float4* hit_point_buffer; + HitRecord* hit_buffer; float3* sun_dir_buffer; curandState* rng_states; OptixTraversableHandle handle; - int32_t* element_id_buffer; - uint8_t* hit_type_buffer; + // int32_t* element_id_buffer; + // uint8_t* hit_type_buffer; + uint64_t* d_depth_exceeded_count; // Atomic counter: rays stopped by max depth, not absorption + float3 sun_vector; bool include_sun_shape_errors; diff --git a/coretrace/simulation_runner/optix_runner/OptixCSP/src/shaders/intersection.cu b/coretrace/simulation_runner/optix_runner/OptixCSP/src/shaders/intersection.cu index 6b25206d..b9688d39 100644 --- a/coretrace/simulation_runner/optix_runner/OptixCSP/src/shaders/intersection.cu +++ b/coretrace/simulation_runner/optix_runner/OptixCSP/src/shaders/intersection.cu @@ -1,46 +1,54 @@ #include -//#include +// #include #include "Soltrace.h" #include #include "GeometryDataST.h" -extern "C" { +extern "C" +{ __constant__ OptixCSP::LaunchParams params; } +extern "C" __device__ __inline__ float ray_distance_to_plane(float3 ro, float3 rd, float4 plane) +{ + const float3 n = make_float3(plane); + return (plane.w - dot(n, ro)) / dot(rd, n); +} extern "C" __global__ void __intersection__parallelogram() { - int i = optixGetPrimitiveIndex(); - const OptixCSP::GeometryDataST::Parallelogram& parallelogram = params.geometry_data_array[i].getParallelogram(); - + int i = optixGetPrimitiveIndex(); + const OptixCSP::GeometryDataST::Parallelogram ¶llelogram = params.geometry_data_array[i].getParallelogram(); + // Get ray information: origin, direction, and min/max distances over which ray should be tested const float3 ray_orig = optixGetWorldRayOrigin(); - const float3 ray_dir = optixGetWorldRayDirection(); - const float ray_tmin = optixGetRayTmin(), ray_tmax = optixGetRayTmax(); + const float3 ray_dir = optixGetWorldRayDirection(); + const float ray_tmin = optixGetRayTmin(), ray_tmax = optixGetRayTmax(); - // Compute ray intersection point - float3 n = make_float3( parallelogram.plane ); - float dt = dot( ray_dir, n ); - // Compute distance t (point of intersection) along ray direction from ray origin - float t = ( parallelogram.plane.w - dot( n, ray_orig ) ) / dt; + // // Compute ray intersection point + // float3 n = make_float3( parallelogram.plane ); + // float dt = dot( ray_dir, n ); + // // Compute distance t (point of intersection) along ray direction from ray origin + // float t = ( parallelogram.plane.w - dot( n, ray_orig ) ) / dt; + float t = ray_distance_to_plane(ray_orig, ray_dir, parallelogram.plane); + const float4 n = parallelogram.plane; // Verify intersection distance and Report ray intersection point - if( t > ray_tmin && t < ray_tmax ) + if (t > ray_tmin && t < ray_tmax) { - float3 p = ray_orig + ray_dir * t; + float3 p = ray_orig + ray_dir * t; float3 vi = p - parallelogram.anchor; - float a1 = dot( parallelogram.v1, vi ); - if( a1 >= 0 && a1 <= 1 ) + float a1 = dot(parallelogram.v1, vi); + if (a1 >= 0 && a1 <= 1) { - float a2 = dot( parallelogram.v2, vi ); - if( a2 >= 0 && a2 <= 1 ) + float a2 = dot(parallelogram.v2, vi); + if (a2 >= 0 && a2 <= 1) { - optixReportIntersection( t, - 0, - __float_as_uint( n.x ), - __float_as_uint( n.y ), - __float_as_uint( n.z )); + optixReportIntersection(t, + 0, + __float_as_uint(n.x), + __float_as_uint(n.y), + __float_as_uint(n.z)); } } } @@ -49,61 +57,63 @@ extern "C" __global__ void __intersection__parallelogram() extern "C" __global__ void __intersection__rectangle_flat() { - const OptixCSP::GeometryDataST::Rectangle_Flat& rectangle = params.geometry_data_array[optixGetPrimitiveIndex()].getRectangle_Flat(); - + const OptixCSP::GeometryDataST::Rectangle_Flat &rectangle = params.geometry_data_array[optixGetPrimitiveIndex()].getRectangle_Flat(); + const float3 ray_orig = optixGetWorldRayOrigin(); const float3 ray_dir = optixGetWorldRayDirection(); const float ray_tmin = optixGetRayTmin(); const float ray_tmax = optixGetRayTmax(); - // Get plane normal and distance - float3 n = make_float3(rectangle.plane); - float dt = dot(ray_dir, n); - - // Compute distance t (point of intersection) along ray direction from ray origin - float t = (rectangle.plane.w - dot(n, ray_orig)) / dt; + // // Get plane normal and distance + // float3 n = make_float3(rectangle.plane); + // float dt = dot(ray_dir, n); + + // // Compute distance t (point of intersection) along ray direction from ray origin + // float t = (rectangle.plane.w - dot(n, ray_orig)) / dt; + float t = ray_distance_to_plane(ray_orig, ray_dir, rectangle.plane); + const float4 n = rectangle.plane; // Verify intersection distance if (t > ray_tmin && t < ray_tmax) { // Compute intersection point float3 p = ray_orig + ray_dir * t; - + // Compute vector from center to intersection point float3 v = p - rectangle.center; - + // Project onto x and y to get local coordinates float x = dot(rectangle.x, v); float y = dot(rectangle.y, v); - + // Check if point is within rectangle bounds - if (x >= -rectangle.width/2 && x <= rectangle.width/2 && - y >= -rectangle.height/2 && y <= rectangle.height/2) + if (x >= -rectangle.width / 2 && x <= rectangle.width / 2 && + y >= -rectangle.height / 2 && y <= rectangle.height / 2) { optixReportIntersection(t, - 0, - __float_as_uint(n.x), - __float_as_uint(n.y), - __float_as_uint(n.z)); + 0, + __float_as_uint(n.x), + __float_as_uint(n.y), + __float_as_uint(n.z)); } } } extern "C" __global__ void __intersection__cylinder_y() { - const OptixCSP::GeometryDataST::Cylinder_Y& cyl = params.geometry_data_array[optixGetPrimitiveIndex()].getCylinder_Y(); + const OptixCSP::GeometryDataST::Cylinder_Y &cyl = params.geometry_data_array[optixGetPrimitiveIndex()].getCylinder_Y(); // Get ray information: origin, direction, and min/max distances over which ray should be tested const float3 ray_orig = optixGetWorldRayOrigin(); const float3 ray_dir = normalize(optixGetWorldRayDirection()); - const float ray_tmin = optixGetRayTmin(); - const float ray_tmax = optixGetRayTmax(); + const float ray_tmin = optixGetRayTmin(); + const float ray_tmax = optixGetRayTmax(); // Transform ray to the cylinder's local coordinate system float3 local_ray_orig = ray_orig - cyl.center; float3 local_ray_dir = ray_dir; - // TODO: check how to optimize this, there should be a way in optix to rotate coordinates + // TODO: check how to optimize this, there should be a way in optix to rotate coordinates float3 local_x = cyl.base_x; float3 local_z = cyl.base_z; float3 local_y = cross(local_z, local_x); @@ -111,15 +121,13 @@ extern "C" __global__ void __intersection__cylinder_y() local_ray_orig = make_float3( dot(local_ray_orig, local_x), dot(local_ray_orig, local_y), - dot(local_ray_orig, local_z) - ); + dot(local_ray_orig, local_z)); local_ray_dir = make_float3( dot(local_ray_dir, local_x), dot(local_ray_dir, local_y), - dot(local_ray_dir, local_z) - ); + dot(local_ray_dir, local_z)); - // solve quadratic equation for intersection + // solve quadratic equation for intersection float A = local_ray_dir.x * local_ray_dir.x + local_ray_dir.z * local_ray_dir.z; float B = 2.0f * (local_ray_orig.x * local_ray_dir.x + local_ray_orig.z * local_ray_dir.z); float C = local_ray_orig.x * local_ray_orig.x + local_ray_orig.z * local_ray_orig.z - cyl.radius * cyl.radius; @@ -169,23 +177,23 @@ extern "C" __global__ void __intersection__cylinder_y() // Report intersection to OptiX optixReportIntersection(t, - 0, - __float_as_uint(world_normal.x), - __float_as_uint(world_normal.y), - __float_as_uint(world_normal.z)); + 0, + __float_as_uint(world_normal.x), + __float_as_uint(world_normal.y), + __float_as_uint(world_normal.z)); } -// ray cylinder intersection with top and bottom caps -// it can also be modeled as cylinder with two disks. +// ray cylinder intersection with top and bottom caps +// it can also be modeled as cylinder with two disks. extern "C" __global__ void __intersection__cylinder_y_capped() { - const OptixCSP::GeometryDataST::Cylinder_Y& cyl = params.geometry_data_array[optixGetPrimitiveIndex()].getCylinder_Y(); + const OptixCSP::GeometryDataST::Cylinder_Y &cyl = params.geometry_data_array[optixGetPrimitiveIndex()].getCylinder_Y(); // Get ray information: origin, direction, and min/max distances over which ray should be tested const float3 ray_orig = optixGetWorldRayOrigin(); const float3 ray_dir = normalize(optixGetWorldRayDirection()); - const float ray_tmin = optixGetRayTmin(); - const float ray_tmax = optixGetRayTmax(); + const float ray_tmin = optixGetRayTmin(); + const float ray_tmax = optixGetRayTmax(); // Transform ray to the cylinder's local coordinate system float3 local_ray_orig = ray_orig - cyl.center; @@ -199,13 +207,11 @@ extern "C" __global__ void __intersection__cylinder_y_capped() local_ray_orig = make_float3( dot(local_ray_orig, local_x), dot(local_ray_orig, local_y), - dot(local_ray_orig, local_z) - ); + dot(local_ray_orig, local_z)); local_ray_dir = make_float3( dot(local_ray_dir, local_x), dot(local_ray_dir, local_y), - dot(local_ray_dir, local_z) - ); + dot(local_ray_dir, local_z)); // Solve quadratic equation for intersection with curved surface float A = local_ray_dir.x * local_ray_dir.x + local_ray_dir.z * local_ray_dir.z; @@ -240,7 +246,7 @@ extern "C" __global__ void __intersection__cylinder_y_capped() { float t = (-cyl.half_height - local_ray_orig.y) / local_ray_dir.y; float2 hit_point = make_float2(local_ray_orig.x + t * local_ray_dir.x, - local_ray_orig.z + t * local_ray_dir.z); + local_ray_orig.z + t * local_ray_dir.z); if (t > ray_tmin && t < ray_tmax && dot(hit_point, hit_point) <= cyl.radius * cyl.radius) { t_caps = t; @@ -252,7 +258,7 @@ extern "C" __global__ void __intersection__cylinder_y_capped() { float t = (cyl.half_height - local_ray_orig.y) / local_ray_dir.y; float2 hit_point = make_float2(local_ray_orig.x + t * local_ray_dir.x, - local_ray_orig.z + t * local_ray_dir.z); + local_ray_orig.z + t * local_ray_dir.z); if (t > ray_tmin && t < ray_tmax && dot(hit_point, hit_point) <= cyl.radius * cyl.radius) { t_caps = fminf(t_caps, t); @@ -294,12 +300,10 @@ extern "C" __global__ void __intersection__cylinder_y_capped() 0, // User-defined instance ID or custom data __float_as_uint(world_normal.x), __float_as_uint(world_normal.y), - __float_as_uint(world_normal.z) - ); + __float_as_uint(world_normal.z)); } - -// For a parabolic surface rectangle aperture where +// For a parabolic surface rectangle aperture where // the base (normal projection) is defined by the center and its two unit edge vectors // In a local coordinate system (with origin at the anchor) the flat rectangle covers: // x in [0, L1] and y in [0, L2], @@ -320,12 +324,12 @@ extern "C" __global__ void __intersection__cylinder_y_capped() // The local hit point is then transformed back to world space for reporting. extern "C" __global__ void __intersection__rectangle_parabolic() { - const OptixCSP::GeometryDataST::Rectangle_Parabolic& rect = params.geometry_data_array[optixGetPrimitiveIndex()].getRectangleParabolic(); + const OptixCSP::GeometryDataST::Rectangle_Parabolic &rect = params.geometry_data_array[optixGetPrimitiveIndex()].getRectangleParabolic(); // Get ray information. const float3 ray_orig = optixGetWorldRayOrigin(); const float3 ray_dir = optixGetWorldRayDirection(); - const float ray_tmin = optixGetRayTmin(); - const float ray_tmax = optixGetRayTmax(); + const float ray_tmin = optixGetRayTmin(); + const float ray_tmax = optixGetRayTmax(); // // Build the local coordinate system. @@ -396,9 +400,11 @@ extern "C" __global__ void __intersection__rectangle_parabolic() } } } - else { + else + { float discr = B * B - 4.0f * A * C; - if (discr >= 0.0f) { + if (discr >= 0.0f) + { float sqrt_discr = sqrtf(discr); t1 = -0.5f * (B + sqrt_discr) / A; t2 = -0.5f * (B - sqrt_discr) / A; @@ -443,12 +449,12 @@ extern "C" __global__ void __intersection__rectangle_parabolic() // N_local = (-f_x, -f_y, 1) = ( -curv_x*x_hit, -curv_y*y_hit, 1 ). // float3 N_local = normalize(make_float3(-curv_x * x_hit, - -curv_y * y_hit, - 1.0f)); + -curv_y * y_hit, + 1.0f)); // Transform the normal back to world coordinates. float3 world_normal = normalize(N_local.x * e1 + - N_local.y * e2 + - N_local.z * n); + N_local.y * e2 + + N_local.z * n); // Compute the hit point in world space. float3 world_hit = ray_orig + t * ray_dir; @@ -457,19 +463,19 @@ extern "C" __global__ void __intersection__rectangle_parabolic() // Here, the two reported extra attributes are the parametric coordinates (a1, a2), // encoded as unsigned integers. optixReportIntersection(t, 0, - __float_as_uint(world_normal.x), - __float_as_uint(world_normal.y), - __float_as_uint(world_normal.z)); + __float_as_uint(world_normal.x), + __float_as_uint(world_normal.y), + __float_as_uint(world_normal.z)); } // intersection algorithm for a flat triangle based on "Fast, Minimum Storage Ray/Triangle Intersection" by M�ller and Trumbore (1997) -// code from here: https://en.wikipedia.org/wiki/M%C3%B6ller%E2%80%93Trumbore_intersection_algorithm +// code from here: https://en.wikipedia.org/wiki/M%C3%B6ller%E2%80%93Trumbore_intersection_algorithm extern "C" __device__ __inline__ float _triangle_intersect( float3 p0, float3 edge1, float3 edge2, float3 ro, float3 rd) { const float3 pvec = cross(rd, edge2); - const float det = dot(edge1, pvec); + const float det = dot(edge1, pvec); // // Backface culling + parallel rejection // // (det must be strictly positive and not tiny) @@ -479,26 +485,28 @@ extern "C" __device__ __inline__ float _triangle_intersect( // Parallel rejection // (det must be not tiny) const float eps = 1e-8f; - if (fabs(det) <= eps) return -1.0f; - + if (fabs(det) <= eps) + return -1.0f; const float inv_det = 1.0f / det; const float3 tvec = ro - p0; - const float u = dot(tvec, pvec) * inv_det; - if (u < 0.0f || u > 1.0f) return -1.0f; + const float u = dot(tvec, pvec) * inv_det; + if (u < 0.0f || u > 1.0f) + return -1.0f; const float3 qvec = cross(tvec, edge1); - const float v = dot(rd, qvec) * inv_det; - if (v < 0.0f || (u + v) > 1.0f) return -1.0f; + const float v = dot(rd, qvec) * inv_det; + if (v < 0.0f || (u + v) > 1.0f) + return -1.0f; - const float t = dot(edge2, qvec) * inv_det; + const float t = dot(edge2, qvec) * inv_det; return t; } // // intersection algorithm for a flat triangle based on "Fast, Minimum Storage Ray/Triangle Intersection" by M�ller and Trumbore (1997) -// // code from here: https://en.wikipedia.org/wiki/M%C3%B6ller%E2%80%93Trumbore_intersection_algorithm +// // code from here: https://en.wikipedia.org/wiki/M%C3%B6ller%E2%80%93Trumbore_intersection_algorithm // extern "C" __global__ void __intersection__triangle_flat() // { // const OptixCSP::GeometryDataST::Triangle_Flat& tri = params.geometry_data_array[optixGetPrimitiveIndex()].getTriangle_Flat(); @@ -511,7 +519,6 @@ extern "C" __device__ __inline__ float _triangle_intersect( // const float3 edge1 = tri.e1; // const float3 edge2 = tri.e2; - // const float3 pvec = cross(rd, edge2); // const float det = dot(edge1, pvec); @@ -528,7 +535,7 @@ extern "C" __device__ __inline__ float _triangle_intersect( // const float3 qvec = cross(tvec, edge1); // const float v = dot(rd, qvec) * inv_det; -// if (v < 0.0f || (u + v) > 1.0f) +// if (v < 0.0f || (u + v) > 1.0f) // return; // const float t = dot(edge2, qvec) * inv_det; @@ -544,31 +551,30 @@ extern "C" __device__ __inline__ float _triangle_intersect( // } // intersection algorithm for a flat triangle based on "Fast, Minimum Storage Ray/Triangle Intersection" by M�ller and Trumbore (1997) -// code from here: https://en.wikipedia.org/wiki/M%C3%B6ller%E2%80%93Trumbore_intersection_algorithm +// code from here: https://en.wikipedia.org/wiki/M%C3%B6ller%E2%80%93Trumbore_intersection_algorithm extern "C" __global__ void __intersection__triangle_flat() { - const OptixCSP::GeometryDataST::Triangle_Flat& tri = params.geometry_data_array[optixGetPrimitiveIndex()].getTriangle_Flat(); + const OptixCSP::GeometryDataST::Triangle_Flat &tri = params.geometry_data_array[optixGetPrimitiveIndex()].getTriangle_Flat(); const float3 ro = optixGetObjectRayOrigin(); const float3 rd = optixGetObjectRayDirection(); const float t = _triangle_intersect(tri.v0, tri.e1, tri.e2, ro, rd); - if (t < optixGetRayTmin() || t > optixGetRayTmax()) return; + if (t < optixGetRayTmin() || t > optixGetRayTmax()) + return; float3 world_normal = tri.normal; optixReportIntersection(t, 0, - __float_as_uint(world_normal.x), - __float_as_uint(world_normal.y), - __float_as_uint(world_normal.z)); - + __float_as_uint(world_normal.x), + __float_as_uint(world_normal.y), + __float_as_uint(world_normal.z)); } - extern "C" __global__ void __intersection__quadrilateral_flat() { - const OptixCSP::GeometryDataST::Quadrilateral_Flat& quad = params.geometry_data_array[optixGetPrimitiveIndex()].getQuadrilateral_Flat(); + const OptixCSP::GeometryDataST::Quadrilateral_Flat &quad = params.geometry_data_array[optixGetPrimitiveIndex()].getQuadrilateral_Flat(); const float3 ro = optixGetObjectRayOrigin(); const float3 rd = optixGetObjectRayDirection(); @@ -587,12 +593,138 @@ extern "C" __global__ void __intersection__quadrilateral_flat() t = _triangle_intersect(p2, e1, e2, ro, rd); } - if (t < optixGetRayTmin() || t > optixGetRayTmax()) return; + if (t < optixGetRayTmin() || t > optixGetRayTmax()) + return; float3 world_normal = quad.normal; optixReportIntersection(t, 0, - __float_as_uint(world_normal.x), - __float_as_uint(world_normal.y), - __float_as_uint(world_normal.z)); + __float_as_uint(world_normal.x), + __float_as_uint(world_normal.y), + __float_as_uint(world_normal.z)); +} + +extern "C" __global__ void __intersection__circle_flat() +{ + const OptixCSP::GeometryDataST::Circle_Flat &circ = params.geometry_data_array[optixGetPrimitiveIndex()].getCircle_Flat(); + + // Get ray information: origin, direction, and min/max distances over which ray should be tested + const float3 ray_orig = optixGetWorldRayOrigin(); + const float3 ray_dir = optixGetWorldRayDirection(); + const float ray_tmin = optixGetRayTmin(), ray_tmax = optixGetRayTmax(); + + // // Compute ray intersection point + // float3 n = make_float3( circ.plane ); + // float dt = dot( ray_dir, n ); + // // Compute distance t (point of intersection) along ray direction from ray origin + // float t = ( circ.plane.w - dot( n, ray_orig ) ) / dt; + float t = ray_distance_to_plane(ray_orig, ray_dir, circ.plane); + const float4 n = circ.plane; + + // Verify intersection distance and Report ray intersection point + if (t > ray_tmin && t < ray_tmax) + { + float3 p = ray_orig + ray_dir * t; + float d = length(p - circ.center); + if (d <= circ.r) + { + optixReportIntersection(t, + 0, + __float_as_uint(n.x), + __float_as_uint(n.y), + __float_as_uint(n.z)); + } + } +} + +extern "C" __global__ void __intersection__hexagon_flat() +{ + const OptixCSP::GeometryDataST::Hexagon_Flat &hex = params.geometry_data_array[optixGetPrimitiveIndex()].getHexagon_Flat(); + + // Get ray information: origin, direction, and min/max distances over which ray should be tested + const float3 ray_orig = optixGetWorldRayOrigin(); + const float3 ray_dir = optixGetWorldRayDirection(); + const float ray_tmin = optixGetRayTmin(), ray_tmax = optixGetRayTmax(); + + float t = ray_distance_to_plane(ray_orig, ray_dir, hex.plane); + const float4 n = hex.plane; + + // Verify intersection distance and Report ray intersection point + if (t > ray_tmin && t < ray_tmax) + { + // TODO: Need to adjust for possible rotation... + bool is_in = false; + float3 p = ray_orig + ray_dir * t - hex.center; + // float d = length(p - circ.center); + float s = hex.s; + float xl = 0.5f * s; + float yl = 0.5f * sqrtf(3.0f) * s; + if (-xl <= p.x && p.x <= xl && -yl <= p.y && p.y <= yl) + { + // Center + is_in = true; + } + else if (-s <= p.x && p.x < xl) + { + // Left side + float y1 = sqrtf(3.0f) * (p.x + s); + float y2 = -y1; + if (y2 <= p.y && p.y <= y1) + { + is_in = true; + } + } + else if (xl < p.x && p.x <= s) + { + // Right side + float y1 = sqrtf(3.0f) * (p.x - s); + float y2 = -y1; + if (y1 <= p.y && p.y <= y2) + { + is_in = true; + } + } + + if (is_in) + { + optixReportIntersection(t, + 0, + __float_as_uint(n.x), + __float_as_uint(n.y), + __float_as_uint(n.z)); + } + } +} + +extern "C" __global__ void __intersection__annulus_flat() +{ + const OptixCSP::GeometryDataST::Annulus_Flat &anf = params.geometry_data_array[optixGetPrimitiveIndex()].getAnnulus_Flat(); + + // Get ray information: origin, direction, and min/max distances over which ray should be tested + const float3 ray_orig = optixGetWorldRayOrigin(); + const float3 ray_dir = optixGetWorldRayDirection(); + const float ray_tmin = optixGetRayTmin(), ray_tmax = optixGetRayTmax(); + + float t = ray_distance_to_plane(ray_orig, ray_dir, anf.plane); + const float4 n = anf.plane; + + // Verify intersection distance and Report ray intersection point + if (t > ray_tmin && t < ray_tmax) + { + // TODO: Need to adjust for possible rotation... + float3 p = ray_orig + ray_dir * t - anf.center; + float d = length(p); + if (anf.ri <= d && d <= anf.ro) + { + float theta = atan2f(p.y, p.x); + if (fabsf(theta) <= 0.5f * anf.arc) + { + optixReportIntersection(t, + 0, + __float_as_uint(n.x), + __float_as_uint(n.y), + __float_as_uint(n.z)); + } + } + } } diff --git a/coretrace/simulation_runner/optix_runner/OptixCSP/src/shaders/materials.cu b/coretrace/simulation_runner/optix_runner/OptixCSP/src/shaders/materials.cu index 0c896c55..004dbf73 100644 --- a/coretrace/simulation_runner/optix_runner/OptixCSP/src/shaders/materials.cu +++ b/coretrace/simulation_runner/optix_runner/OptixCSP/src/shaders/materials.cu @@ -125,7 +125,7 @@ extern "C" __global__ void __closesthit__element() const float3 hit_point = ray_orig + ray_t * ray_dir; OptixCSP::PerRayData prd = OptixCSP::getPayload(); - const int new_depth = prd.depth + 1; // Increment the ray depth for recursive tracing + const unsigned int new_depth = prd.depth + 1; // Increment the ray depth for recursive tracing // we have two scenarios here // if we use refraction, then we look at transmissivity to determine if the ray will refract @@ -241,17 +241,17 @@ extern "C" __global__ void __closesthit__element() { // Get buffer slot - const int slot = params.max_depth * prd.ray_path_index + new_depth; + const unsigned int slot = params.max_depth * prd.ray_path_index + new_depth; // Store the hit point in the hit point buffer (used for visualization or further calculations) - params.hit_point_buffer[slot] = make_float4(new_depth, hit_point); + params.hit_buffer[slot].hit_point = make_float4(new_depth, hit_point); // Store element id const int32_t elementId = params.geometry_data_array[optixGetPrimitiveIndex()].id; - params.element_id_buffer[slot] = elementId; + params.hit_buffer[slot].element_id = elementId; // Store hit type - params.hit_type_buffer[slot] = hit_type; + params.hit_buffer[slot].hit_type = hit_type; // Store the reflected direction in its buffer (used for visualization or further calculations) /* @@ -285,6 +285,11 @@ extern "C" __global__ void __closesthit__element() prd.depth = params.max_depth; // terminate the ray by setting depth to max depth } } + else if (!absorbed) + { + // Ray hit an element but max depth was reached; count it (absorption at this depth does not count). + atomicAdd(reinterpret_cast(params.d_depth_exceeded_count), 1ULL); + } setPayload(prd); } diff --git a/coretrace/simulation_runner/optix_runner/OptixCSP/src/shaders/sun.cu b/coretrace/simulation_runner/optix_runner/OptixCSP/src/shaders/sun.cu index 1d5cf084..fd83a085 100644 --- a/coretrace/simulation_runner/optix_runner/OptixCSP/src/shaders/sun.cu +++ b/coretrace/simulation_runner/optix_runner/OptixCSP/src/shaders/sun.cu @@ -2,27 +2,29 @@ #include #include -// todo: move curand initializatin to global function - -//#include -//#include +// #include +// #include #include "Soltrace.h" #include "soltrace_constants.h" #include // Launch parameters for soltrace -extern "C" { +extern "C" +{ __constant__ OptixCSP::LaunchParams params; } -namespace OptixCSP { +namespace OptixCSP +{ // Halton sequence generator, used for quasi-random sampling // Generates a Halton sequence value for a given index and base - __device__ float halton(int index, int base) { + __device__ float halton(unsigned int index, unsigned int base) + { float f = 1.0f, result = 0.0f; - while (index > 0) { + while (index > 0) + { f = f / base; result = result + f * (index % base); index = index / base; @@ -32,7 +34,8 @@ namespace OptixCSP { // Generate a sample point within a parallelogram defined by the AABB (Axis-Aligned Bounding Box) // Uses the Halton sequence for sampling - __device__ float3 haltonSampleInParallelogram(unsigned int sample_index) { + __device__ float3 haltonSampleInParallelogram(unsigned int sample_index) + { // Generate Halton sequence values float u = halton(sample_index, 2); // Base 2 for x float v = halton(sample_index, 3); // Base 3 for y @@ -60,7 +63,8 @@ namespace OptixCSP { } // Sample a random ray direction within a cone defined by a maximum angle - __device__ float3 sampleRayDirectionInCone_Pillbox(float3 dir, float half_angle, unsigned int ray_number) { + __device__ float3 sampleRayDirectionInCone_Pillbox(float3 dir, float half_angle, unsigned int ray_number) + { curandState rng_state = params.rng_states[ray_number]; const float half_angle_mrad = half_angle; @@ -91,10 +95,11 @@ namespace OptixCSP { return normalize(sin_t * (cosf(phi) * u + sinf(phi) * v) + cos_t * w); } - __device__ float3 sampleRayDirectionInCone_Gaussian(float3 dir, float sigma, unsigned int ray_number) { + __device__ float3 sampleRayDirectionInCone_Gaussian(float3 dir, float sigma, unsigned int ray_number) + { curandState rng = params.rng_states[ray_number]; - const float sigma_rad = sigma * 0.001f; // Convert to rad + const float sigma_rad = sigma * 0.001f; // Convert to rad // Build an orthonormal basis float3 w = normalize(dir); @@ -119,7 +124,7 @@ namespace OptixCSP { { curandState rng = params.rng_states[ray_number]; - const float max_angle_mrad = params.sun_max_angle; // [mrad] + const float max_angle_mrad = params.sun_max_angle; // [mrad] // Orthonormal basis about dir float3 w = normalize(dir); @@ -141,7 +146,7 @@ namespace OptixCSP { theta2 = thetax * thetax + thetay * thetay; theta = sqrtf(theta2); - if (theta <= 4.65f) // within solar disc (mrad, as in CPU code) + if (theta <= 4.65f) // within solar disc (mrad, as in CPU code) { // stest = cos(0.326 * theta) / cos(0.308 * theta); float t = theta; @@ -168,8 +173,7 @@ namespace OptixCSP { float3 local_dir = make_float3( sin_t * cosf(phi), sin_t * sinf(phi), - cos_t - ); + cos_t); params.rng_states[ray_number] = rng; @@ -216,8 +220,7 @@ namespace OptixCSP { float3 local_dir = make_float3( sin_t * cosf(phi), sin_t * sinf(phi), - cos_t - ); + cos_t); params.rng_states[ray_number] = rng; @@ -226,8 +229,8 @@ namespace OptixCSP { return world_dir; } - __device__ float3 sampleRayDirectionInCone_UserDefined(float3 dir, int user_capacity, float* user_angle, - float* user_intensity, unsigned int ray_number) + __device__ float3 sampleRayDirectionInCone_UserDefined(float3 dir, int user_capacity, float *user_angle, + float *user_intensity, unsigned int ray_number) { curandState rng = params.rng_states[ray_number]; @@ -236,7 +239,7 @@ namespace OptixCSP { return normalize(dir); } - const float max_angle_mrad = params.sun_max_angle; // [mrad] + const float max_angle_mrad = params.sun_max_angle; // [mrad] const float max_int = params.sun_max_intensity; // Orthonormal basis about dir @@ -268,8 +271,7 @@ namespace OptixCSP { stest = user_intensity[i]; else { - stest = user_intensity[i - 1] + (user_intensity[i] - user_intensity[i - 1]) * (theta - user_angle[i - 1]) - / denom; + stest = user_intensity[i - 1] + (user_intensity[i] - user_intensity[i - 1]) * (theta - user_angle[i - 1]) / denom; } } @@ -287,8 +289,7 @@ namespace OptixCSP { float3 local_dir = make_float3( sin_t * cosf(phi), sin_t * sinf(phi), - cos_t - ); + cos_t); params.rng_states[ray_number] = rng; @@ -302,22 +303,22 @@ namespace OptixCSP { extern "C" __global__ void __raygen__sun_source() { // Lookup location in launch grid - const uint3 launch_idx = optixGetLaunchIndex(); // Index of the current launch thread - const uint3 launch_dims = optixGetLaunchDimensions(); // Dimensions of the launch grid - const unsigned int ray_number = launch_idx.y * launch_dims.x + launch_idx.x; // Unique ray ID - const unsigned int ray_number_global = ray_number + params.ray_offset; // Global unique ray ID + const uint3 launch_idx = optixGetLaunchIndex(); // Index of the current launch thread + const uint3 launch_dims = optixGetLaunchDimensions(); // Dimensions of the launch grid + const unsigned int ray_number = launch_idx.y * launch_dims.x + launch_idx.x; // Unique ray ID + const unsigned long long ray_number_global = ray_number + params.ray_offset; // Global unique ray ID float3 sun_sample_pos; switch (params.sun_gen_type) { - case(OptixCSP::GenType::RANDOM): - sun_sample_pos = OptixCSP::randomSampleInParallelogram(ray_number); - break; - case(OptixCSP::GenType::HALTON): - sun_sample_pos = OptixCSP::haltonSampleInParallelogram(ray_number_global); - break; - default: - return; + case (OptixCSP::GenType::RANDOM): + sun_sample_pos = OptixCSP::randomSampleInParallelogram(ray_number); + break; + case (OptixCSP::GenType::HALTON): + sun_sample_pos = OptixCSP::haltonSampleInParallelogram(ray_number_global); + break; + default: + return; } // Sample emission angle here - capturing sun distribution @@ -331,47 +332,44 @@ extern "C" __global__ void __raygen__sun_source() { switch (params.sun_shape) { - case(OptixCSP::SunShape::PILLBOX): - ray_dir = OptixCSP::sampleRayDirectionInCone_Pillbox(init_ray_dir, params.half_width, ray_number); - break; - case(OptixCSP::SunShape::GAUSSIAN): - ray_dir = OptixCSP::sampleRayDirectionInCone_Gaussian(init_ray_dir, params.sigma, ray_number); - break; - case(OptixCSP::SunShape::BUIE_CSR): - ray_dir = OptixCSP::sampleRayDirectionInCone_BuieCSR(init_ray_dir, params.buie_kappa, params.buie_gamma, ray_number); - break; - case(OptixCSP::SunShape::LIMBDARKENED): - ray_dir = OptixCSP::sampleRayDirectionInCone_LimbDarkened(init_ray_dir, ray_number); - break; - case(OptixCSP::SunShape::USER_DEFINED): - ray_dir = OptixCSP::sampleRayDirectionInCone_UserDefined(init_ray_dir, params.sun_user_capacity, - params.sun_user_angle, params.sun_user_intensity, ray_number); - break; - default: - assert(false); - // Just return since the sun shape is not supported - return; + case (OptixCSP::SunShape::PILLBOX): + ray_dir = OptixCSP::sampleRayDirectionInCone_Pillbox(init_ray_dir, params.half_width, ray_number); + break; + case (OptixCSP::SunShape::GAUSSIAN): + ray_dir = OptixCSP::sampleRayDirectionInCone_Gaussian(init_ray_dir, params.sigma, ray_number); + break; + case (OptixCSP::SunShape::BUIE_CSR): + ray_dir = OptixCSP::sampleRayDirectionInCone_BuieCSR(init_ray_dir, params.buie_kappa, params.buie_gamma, ray_number); + break; + case (OptixCSP::SunShape::LIMBDARKENED): + ray_dir = OptixCSP::sampleRayDirectionInCone_LimbDarkened(init_ray_dir, ray_number); + break; + case (OptixCSP::SunShape::USER_DEFINED): + ray_dir = OptixCSP::sampleRayDirectionInCone_UserDefined(init_ray_dir, params.sun_user_capacity, + params.sun_user_angle, params.sun_user_intensity, ray_number); + break; + default: + assert(false); + // Just return since the sun shape is not supported + return; } } else { ray_dir = init_ray_dir; } - - - //float3 ray_dir = OptixCSP::sampleRayDirectionInCone_Gaussian(init_ray_dir, params.max_sun_angle, ray_number); + + // float3 ray_dir = OptixCSP::sampleRayDirectionInCone_Gaussian(init_ray_dir, params.max_sun_angle, ray_number); // Create the PerRayData structure to track ray state (e.g., path index and recursion depth) OptixCSP::PerRayData prd; prd.ray_path_index = ray_number; prd.depth = 0; - // TODO make this a launch parameter - params.hit_point_buffer[params.max_depth * prd.ray_path_index] = make_float4(0.0f, ray_gen_pos); - params.element_id_buffer[params.max_depth * prd.ray_path_index] = OptixCSP::kElementIdRayGen; - params.hit_type_buffer[params.max_depth * prd.ray_path_index] = OptixCSP::HitType::HIT_CREATE; + params.hit_buffer[params.max_depth * prd.ray_path_index].hit_point = make_float4(0.0f, ray_gen_pos); + params.hit_buffer[params.max_depth * prd.ray_path_index].element_id = OptixCSP::kElementIdRayGen; + params.hit_buffer[params.max_depth * prd.ray_path_index].hit_type = OptixCSP::HitType::HIT_CREATE; params.sun_dir_buffer[prd.ray_path_index] = ray_dir; - // Cast and trace the ray through the scene optixTrace( @@ -386,7 +384,6 @@ extern "C" __global__ void __raygen__sun_source() OptixCSP::RAY_TYPE_RADIANCE, // Ray type (radiance for sunlight) OptixCSP::RAY_TYPE_COUNT, // Number of ray types OptixCSP::RAY_TYPE_RADIANCE, // SBT offset (ray type to launch) - reinterpret_cast(prd.ray_path_index), - reinterpret_cast(prd.depth) - ); -} \ No newline at end of file + reinterpret_cast(prd.ray_path_index), + reinterpret_cast(prd.depth)); +} diff --git a/coretrace/simulation_runner/optix_runner/OptixCSP/src/utils/util_check.hpp b/coretrace/simulation_runner/optix_runner/OptixCSP/src/utils/util_check.hpp index c9247570..e8927199 100644 --- a/coretrace/simulation_runner/optix_runner/OptixCSP/src/utils/util_check.hpp +++ b/coretrace/simulation_runner/optix_runner/OptixCSP/src/utils/util_check.hpp @@ -83,7 +83,7 @@ namespace OptixCSP { << std::string(log, log + log_size); throw std::runtime_error(oss.str()); } - else if (log_size > 1) + else if (log_size > 1 && log != nullptr && log[0] != '\0') { std::cerr << "OptiX log for " << func << ":\n" << std::string(log, log + log_size) << std::endl; diff --git a/coretrace/simulation_runner/optix_runner/optix_runner.cpp b/coretrace/simulation_runner/optix_runner/optix_runner.cpp index be7f6979..532638a6 100644 --- a/coretrace/simulation_runner/optix_runner/optix_runner.cpp +++ b/coretrace/simulation_runner/optix_runner/optix_runner.cpp @@ -1,8 +1,7 @@ #include "simulation_runner/optix_runner/optix_runner.hpp" -#include "simulation_data/simulation_parameters.hpp" -#include "simulation_data/simulation_data.hpp" #include "simulation_data/simulation_data_export.hpp" +#include #include #include @@ -20,6 +19,46 @@ OptixRunner::~OptixRunner() this->m_sys.clean_up(); } +void OptixRunner::set_verbose(bool verbose) +{ + m_sys.set_verbose(verbose); +} + +void OptixRunner::print_timing() const +{ + m_sys.print_timing(); +} + +void OptixRunner::set_max_ray_depth(uint_fast64_t depth) +{ + m_sys.set_max_ray_depth(depth); +} + +void OptixRunner::set_batch_size(uint_fast64_t batch_size) +{ + m_sys.set_batch_size(batch_size); +} + +uint_fast64_t OptixRunner::get_batch_size() const +{ + return m_sys.get_batch_size(); +} + +void OptixRunner::set_trim_excess_rays(bool trim) +{ + m_sys.set_trim_excess_rays(trim); +} + +bool OptixRunner::get_trim_excess_rays() const +{ + return m_sys.get_trim_excess_rays(); +} + +uint64_t OptixRunner::get_N_run_iterations() const +{ + return m_sys.get_N_run_iterations(); +} + RunnerStatus OptixRunner::initialize() { // add elements to sys using data structure from SimulationData @@ -51,8 +90,6 @@ RunnerStatus OptixRunner::setup_simulation(const SimulationData *data) m_sys.initialize(); - - // std::cout << "Number of stages: " << this->tsys.StageList.size() // << std::endl; @@ -63,6 +100,7 @@ RunnerStatus OptixRunner::setup_parameters(const SimulationData *data) { // Get Parameter data const SimulationParameters &sim_params = data->get_simulation_parameters(); + m_sys.set_number_of_rays(sim_params.number_of_rays, sim_params.max_number_of_rays); m_sys.set_seed(static_cast(sim_params.seed)); @@ -106,6 +144,17 @@ RunnerStatus OptixRunner::setup_sun(const SimulationData *data) } } + // Warn if Halton sampling is used with more rays than uint32_t can index, + // since the Halton sequence index is truncated to 32 bits causing repeated positions. + if (sun->get_gen_type() == SolTrace::Data::GenType::HALTON && + data->get_simulation_parameters().max_number_of_rays > static_cast(std::numeric_limits::max())) + { + std::cerr << "Warning: max_number_of_rays exceeds 32-bit unsigned int maximum (" + << std::numeric_limits::max() + << ") with Halton ray generation. Halton sequence positions will repeat after index " + << std::numeric_limits::max() << "." << std::endl; + } + return RunnerStatus::SUCCESS; } @@ -124,8 +173,10 @@ RunnerStatus OptixRunner::setup_elements(const SimulationData *data) auto optix_el = std::make_shared(); auto origin = el->get_origin_global(); - OptixCSP::Vec3d origin_vec(origin.x, origin.y, origin.z); + auto ap = el->get_aim_vector_global(); + // OptixCSP::Vec3d origin_vec(origin.x, origin.y, origin.z); optix_el->set_origin(ToVec3d(origin)); + optix_el->set_aim_point(ToVec3d(ap)); optix_el->set_rotation_matrix(ToMatrix33d(el->get_local_to_global())); // Safely narrow element id to int32_t @@ -136,8 +187,6 @@ RunnerStatus OptixRunner::setup_elements(const SimulationData *data) } optix_el->set_id(static_cast(id)); - // TODO: check zrot, radiance or degree here? - // Add optical properties OpticalProperties *opt_front = el->get_front_optical_properties(); OptixCSP::OpticalDistribution od = this->to_optical_distribution(opt_front->error_distribution_type); @@ -214,7 +263,6 @@ RunnerStatus OptixRunner::setup_elements(const SimulationData *data) } auto surface = std::make_shared(); - // surface->set_half_height(2.); // TODO this needs to come from the aperture surface->set_half_height(0.5 * el_aperture->y_length()); surface->set_radius(el_surface->radius); optix_el->set_surface(surface); @@ -228,22 +276,39 @@ RunnerStatus OptixRunner::setup_elements(const SimulationData *data) } auto soltrace_aperture_type = el->get_aperture()->get_type(); - + switch (soltrace_aperture_type) { - case ApertureType::RECTANGLE: { - auto el_aperture = std::dynamic_pointer_cast(el->get_aperture()); assert(el_aperture != nullptr); // TODO: account for x and y coord? - auto aperture = std::make_shared(el_aperture->x_length(), el_aperture->y_length(), - el_aperture->x_coord(), el_aperture->y_coord()); + // auto aperture = std::make_shared(el_aperture->x_length(), + // el_aperture->y_length()); + auto aperture = std::make_shared(el_aperture->x_length(), + el_aperture->y_length(), + el_aperture->x_coord(), + el_aperture->y_coord()); + optix_el->set_aperture(aperture); + break; + } + case ApertureType::ANNULUS: + { + auto el_aperture = std::dynamic_pointer_cast(el->get_aperture()); + assert(el_aperture != nullptr); + auto aperture = std::make_shared(el_aperture->inner_radius, el_aperture->outer_radius, el_aperture->arc_angle * D2R); + optix_el->set_aperture(aperture); + break; + } + case ApertureType::CIRCLE: + { + auto el_aperture = std::dynamic_pointer_cast(el->get_aperture()); + assert(el_aperture != nullptr); + auto aperture = std::make_shared(0.5 * el_aperture->diameter); optix_el->set_aperture(aperture); break; } - case ApertureType::EQUILATERAL_TRIANGLE: { auto el_aperture = std::dynamic_pointer_cast(el->get_aperture()); @@ -259,7 +324,6 @@ RunnerStatus OptixRunner::setup_elements(const SimulationData *data) break; } - case ApertureType::IRREGULAR_TRIANGLE: { auto el_aperture = std::dynamic_pointer_cast(el->get_aperture()); @@ -274,7 +338,6 @@ RunnerStatus OptixRunner::setup_elements(const SimulationData *data) break; } - case ApertureType::IRREGULAR_QUADRILATERAL: { auto el_aperture = std::dynamic_pointer_cast(el->get_aperture()); @@ -290,11 +353,18 @@ RunnerStatus OptixRunner::setup_elements(const SimulationData *data) break; } - + case ApertureType::HEXAGON: + { + auto el_aperture = std::dynamic_pointer_cast(el->get_aperture()); + assert(el_aperture != nullptr); + auto aperture = std::make_shared(el_aperture->radius_circumscribed_circle()); + optix_el->set_aperture(aperture); + break; + } default: // std::cerr << "Unsupported aperture type in OptixCSP" << std::endl; - throw std::runtime_error("Unsupported aperture type in OptixRunner"); - break; + throw std::runtime_error("Unsupported aperture type in OptixRunner"); + break; } double xmin, xmax, ymin, ymax, zmin, zmax; @@ -323,9 +393,8 @@ RunnerStatus OptixRunner::setup_elements(const SimulationData *data) RunnerStatus OptixRunner::update_simulation(const SimulationData *data) { - this->setup_simulation(data); - // TODO: Implement this - return RunnerStatus::SUCCESS; + return this->setup_simulation(data); + // TODO: Implement this in a less lazy manner... } RunnerStatus OptixRunner::run_simulation() @@ -374,6 +443,8 @@ RunnerStatus OptixRunner::report_simulation(SimulationResult *result, std::map ray_records; std::map::iterator iter; + // TODO: This should be redone without using these vectors and just using the + // internal hit record vector // Get results from optixcsp std::vector hp_vec; std::vector raynumber_vec; @@ -390,7 +461,7 @@ RunnerStatus OptixRunner::report_simulation(SimulationResult *result, // Loop through data, populating ray records // Assumes ray data is grouped serially size_t ndata = hp_vec.size(); - uint_fast64_t raynum_prev = -1; + // uint_fast64_t raynum_prev = -1; uint_fast64_t raynum = 0; SolTrace::Result::ray_record_ptr rec = nullptr; SolTrace::Result::interaction_ptr intr = nullptr; @@ -399,7 +470,7 @@ RunnerStatus OptixRunner::report_simulation(SimulationResult *result, // Collect results for record raynum = raynumber_vec[ii]; glm::dvec3 pos(hp_vec[ii].y, hp_vec[ii].z, hp_vec[ii].w); // x is depth - glm::dvec3 cos(0.0); // TODO: calculate directions + glm::dvec3 cos(0.0); // TODO: calculate directions int32_t element_id = element_id_vec[ii]; uint8_t hit_type = hit_type_vec[ii]; SolTrace::Result::RayEvent rev = hit_type_to_ray_event(static_cast(hit_type)); @@ -446,7 +517,7 @@ OptixCSP::Vec3d OptixRunner::ToVec3d(glm::dvec3 v) return vec; } -OptixCSP::Matrix33d OptixRunner::ToMatrix33d(const glm::dmat3& mat) +OptixCSP::Matrix33d OptixRunner::ToMatrix33d(const glm::dmat3 &mat) { return OptixCSP::Matrix33d( mat[0][0], mat[1][0], mat[2][0], diff --git a/coretrace/simulation_runner/optix_runner/optix_runner.hpp b/coretrace/simulation_runner/optix_runner/optix_runner.hpp index 7f4054dc..98c87c60 100644 --- a/coretrace/simulation_runner/optix_runner/optix_runner.hpp +++ b/coretrace/simulation_runner/optix_runner/optix_runner.hpp @@ -30,9 +30,38 @@ class OptixRunner : public SolTrace::Runner::SimulationRunner SolTrace::Runner::RunnerStatus get_hp_output(std::vector& hp_vec, std::vector& raynumber_vec, std::vector& element_id_vec); - double get_sun_plane_area() { return m_sys.get_sun_plane_area(); } + double get_sun_plane_area() const { return m_sys.get_sun_plane_area(); } - uint_fast64_t get_N_sun_rays() { return m_sys.get_N_sun_rays(); } + uint_fast64_t get_N_sun_rays() const { return m_sys.get_N_sun_rays(); } + inline uint_fast64_t get_number_rays_launched() const override {return get_N_sun_rays(); } + inline uint_fast64_t get_number_rays_traced() const override {return m_sys.get_N_hit_rays(); } + + uint64_t get_N_run_iterations() const; + + void print_timing() const; + + void set_verbose(bool verbose); + + // Set the number of rays to launch for a trace in each optixLaunch call. + // WARNING: The runner is forced to use this batch size regardless of available GPU memory!!!! + // Setting a large batch size can cause device out of memory errors or degraded GPU performance. + // Setting a small batch size can cause long run times. Care is required when using this function. + void set_batch_size(uint_fast64_t batch_size); + uint_fast64_t get_batch_size() const; + + /// Set the maximum ray interaction depth. Must be called before initialize(). + /// Depth is clamped to [2, 255] with a warning if either bound is exceeded. Defaults to DEFAULT_MAX_TRACE_DEPTH. + void set_max_ray_depth(uint_fast64_t depth); + uint8_t get_max_ray_depth() const { return m_sys.get_max_ray_depth(); } + + /// Returns the number of rays terminated by reaching max_depth without being absorbed. + /// Valid after run_simulation() completes; resets to 0 at the start of each run. + uint_fast64_t get_N_depth_exceeded_rays() const { return m_sys.get_N_depth_exceeded_rays(); } + + /// Enable or disable trimming of excess rays at the end of run() so that + /// exactly the requested number of hit rays is returned. Enabled by default. + void set_trim_excess_rays(bool trim); + bool get_trim_excess_rays() const; // Runner options // void disable_sun_shape_errors() { this->include_sun_shape_errors = false; } diff --git a/coretrace/simulation_runner/simulation_runner.hpp b/coretrace/simulation_runner/simulation_runner.hpp index 91c2c7e2..e8e21ba7 100644 --- a/coretrace/simulation_runner/simulation_runner.hpp +++ b/coretrace/simulation_runner/simulation_runner.hpp @@ -64,6 +64,9 @@ namespace SolTrace::Runner virtual RunnerStatus report_simulation(SolTrace::Result::SimulationResult *result, int level_spec) = 0; + virtual uint_fast64_t get_number_rays_launched() const = 0; + virtual uint_fast64_t get_number_rays_traced() const = 0; + private: }; diff --git a/google-tests/unit-tests/simulation_data/aperture_test.cpp b/google-tests/unit-tests/simulation_data/aperture_test.cpp index 3ee7544e..ddf0f642 100644 --- a/google-tests/unit-tests/simulation_data/aperture_test.cpp +++ b/google-tests/unit-tests/simulation_data/aperture_test.cpp @@ -334,7 +334,7 @@ TEST(Aperture, Hexagon) const double TOL = 1e-12; const double D = 2.0; const double R = 0.5 * D; - const double S = sqrt(3.0) * R; // Side length of hexagon + // const double S = sqrt(3.0) * R; // Side length of hexagon const double AREA = 0.5 * sqrt(27.0) * R * R; const double X1 = 1.0; diff --git a/google-tests/unit-tests/simulation_runner/embree_runner/embree_runner_multithreading_test.cpp b/google-tests/unit-tests/simulation_runner/embree_runner/embree_runner_multithreading_test.cpp index b735fae7..c70054cf 100644 --- a/google-tests/unit-tests/simulation_runner/embree_runner/embree_runner_multithreading_test.cpp +++ b/google-tests/unit-tests/simulation_runner/embree_runner/embree_runner_multithreading_test.cpp @@ -73,7 +73,7 @@ TEST(EmbreeRunner, CancelMultithread) auto fsts = std::async(&EmbreeRunner::run_simulation, &runner); // Give time to start processing - std::this_thread::sleep_for(std::chrono::milliseconds(250)); + std::this_thread::sleep_for(std::chrono::milliseconds(100)); sts = runner.status_simulation(); EXPECT_EQ(sts, RunnerStatus::RUNNING); diff --git a/google-tests/unit-tests/simulation_runner/native_runner/native_runner_test.cpp b/google-tests/unit-tests/simulation_runner/native_runner/native_runner_test.cpp index 9dda077d..c23f0e9b 100644 --- a/google-tests/unit-tests/simulation_runner/native_runner/native_runner_test.cpp +++ b/google-tests/unit-tests/simulation_runner/native_runner/native_runner_test.cpp @@ -236,6 +236,59 @@ TEST(NativeRunner, SmokeTest) << std::endl; } +TEST(NativeRunner, RaysLaunchedEqualsRequestedAfterRun) +{ + const uint_fast64_t NRAYS = 10; + NativeRunner runner; + SimulationData my_sim; + + SimulationParameters ¶ms = my_sim.get_simulation_parameters(); + params.include_optical_errors = false; + params.include_sun_shape_errors = false; + params.number_of_rays = NRAYS; + params.max_number_of_rays = 10 * NRAYS; + + auto sun = SolTrace::Data::make_ray_source(); + sun->set_position(0.0, 0.0, 100.0); + sun->set_shape(SolTrace::Data::SunShape::GAUSSIAN, 1.0, -5.0, 0.0); + my_sim.add_ray_source(sun); + + auto my_st = SolTrace::Data::make_stage(0); + const int NUM_ELEMENTS = 4; + double x[NUM_ELEMENTS] = {1.0, 0.0, -1.0, 0.0}; + double y[NUM_ELEMENTS] = {0.0, 1.0, 0.0, -1.0}; + OpticalProperties optics(SolTrace::Data::InteractionType::REFLECTION, + SolTrace::Data::DistributionType::GAUSSIAN, + 0.0, 1.0, 0.0, 0.0, 1.0, 1.0); + for (int k = 0; k < NUM_ELEMENTS; ++k) + { + element_ptr el = SolTrace::Data::make_element(); + el->set_aperture(SolTrace::Data::make_aperture(2.0)); + el->set_surface(SolTrace::Data::make_surface()); + el->set_reference_frame_geometry(glm::dvec3(x[k], y[k], 0.0), + glm::dvec3(-x[k], -y[k], 1.0), + 0.0); + el->set_front_optical_properties(optics); + el->set_back_optical_properties(optics); + my_st->add_element(el); + } + my_sim.add_stage(my_st); + + RunnerStatus sts = runner.initialize(); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + sts = runner.setup_simulation(&my_sim); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + + // Before running, SunRayCount has not been accumulated yet + EXPECT_EQ(runner.get_number_rays_launched(), static_cast(0)); + + sts = runner.run_simulation(); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + + EXPECT_GE(runner.get_number_rays_launched(), NRAYS); + EXPECT_EQ(runner.get_number_rays_traced(), NRAYS); +} + TEST(NativeRunner, PowerTowerSmokeTest) { SimulationData sd; diff --git a/google-tests/unit-tests/simulation_runner/optix_runner/CMakeLists.txt b/google-tests/unit-tests/simulation_runner/optix_runner/CMakeLists.txt index 3103ba0e..9df394d1 100644 --- a/google-tests/unit-tests/simulation_runner/optix_runner/CMakeLists.txt +++ b/google-tests/unit-tests/simulation_runner/optix_runner/CMakeLists.txt @@ -20,6 +20,9 @@ set(OPTIX_RUNNER_TEST_SRC flat_optical_test.cpp two_plate_test.cpp sun_test.cpp + ray_position_sampling_test.cpp + batch_size_test.cpp + max_ray_depth_test.cpp ) add_executable(OptixRunnerUnitTests diff --git a/google-tests/unit-tests/simulation_runner/optix_runner/batch_size_test.cpp b/google-tests/unit-tests/simulation_runner/optix_runner/batch_size_test.cpp new file mode 100644 index 00000000..7bd5d058 --- /dev/null +++ b/google-tests/unit-tests/simulation_runner/optix_runner/batch_size_test.cpp @@ -0,0 +1,185 @@ +#include + +#include +#include + +#include +#include +#include +#include + +using SolTrace::Runner::RunnerStatus; + +// Reuse the two-plate scene defined in two_plate_test.cpp +void make_two_plate_sd(SimulationData &sd, element_ptr &plate1, element_ptr &plate2); + +// --------------------------------------------------------------------------- +// set_batch_size / get_batch_size accessor tests (no GPU required) +// --------------------------------------------------------------------------- + +// Default value of 0 means automatic batch sizing: determine_batch_size() will +// call automatic_batch_size() to compute a GPU-memory-safe batch size at run +// time. It does NOT mean "launch all rays in a single batch". +TEST(OptixRunnerBatchSize, DefaultIsZeroMeansAutoSize) +{ + OptixRunner runner; + EXPECT_EQ(runner.get_batch_size(), 0u); +} + +TEST(OptixRunnerBatchSize, SetAndGet) +{ + OptixRunner runner; + runner.set_batch_size(500); + EXPECT_EQ(runner.get_batch_size(), 500u); +} + +// Setting back to 0 restores automatic GPU-memory-based sizing. +TEST(OptixRunnerBatchSize, SetZeroRestoresAutoSize) +{ + OptixRunner runner; + runner.set_batch_size(1000); + runner.set_batch_size(0); + EXPECT_EQ(runner.get_batch_size(), 0u); +} + +TEST(OptixRunnerBatchSize, ThrowsOnOverflow) +{ + OptixRunner runner; + const uint_fast64_t too_large = + static_cast(std::numeric_limits::max()) + 1ULL; + EXPECT_THROW(runner.set_batch_size(too_large), std::out_of_range); + // Value should be unchanged after the throw + EXPECT_EQ(runner.get_batch_size(), 0u); +} + +// INT_MAX itself is the largest valid batch size; it must not throw. +TEST(OptixRunnerBatchSize, MaxIntBoundaryDoesNotThrow) +{ + OptixRunner runner; + const uint_fast64_t max_valid = + static_cast(std::numeric_limits::max()); + EXPECT_NO_THROW(runner.set_batch_size(max_valid)); + EXPECT_EQ(runner.get_batch_size(), max_valid); +} + +// A failed set_batch_size must not corrupt a previously stored non-zero value. +TEST(OptixRunnerBatchSize, ThrowPreservesExistingValue) +{ + OptixRunner runner; + runner.set_batch_size(999); + const uint_fast64_t too_large = + static_cast(std::numeric_limits::max()) + 1ULL; + EXPECT_THROW(runner.set_batch_size(too_large), std::out_of_range); + EXPECT_EQ(runner.get_batch_size(), 999u); +} + +// --------------------------------------------------------------------------- +// Simulation correctness: batched run should yield the same hit count as the +// default single-batch run. +// --------------------------------------------------------------------------- + +TEST(OptixRunnerBatchSize, BatchedRunMatchesHitCount) +{ + const int N_rays = 10000; + + // --- reference run (default auto-sized batch: m_batch_size == 0 defers to + // determine_batch_size() / automatic_batch_size()) --- + SimulationData sd_ref; + element_ptr p1_ref, p2_ref; + make_two_plate_sd(sd_ref, p1_ref, p2_ref); + sd_ref.get_simulation_parameters().number_of_rays = N_rays; + sd_ref.get_simulation_parameters().max_number_of_rays = N_rays * 100; + + OptixRunner ref_runner; + ASSERT_EQ(ref_runner.initialize(), RunnerStatus::SUCCESS); + ASSERT_EQ(ref_runner.setup_simulation(&sd_ref), RunnerStatus::SUCCESS); + ASSERT_EQ(ref_runner.run_simulation(), RunnerStatus::SUCCESS); + + SimulationResult ref_result; + ASSERT_EQ(ref_runner.report_simulation(&ref_result, 0), RunnerStatus::SUCCESS); + const int ref_hits = ref_result.get_number_of_records(); + + // --- explicit batched run (user-supplied batch_size = 1000, ~10 iterations) --- + SimulationData sd_batch; + element_ptr p1_batch, p2_batch; + make_two_plate_sd(sd_batch, p1_batch, p2_batch); + sd_batch.get_simulation_parameters().number_of_rays = N_rays; + sd_batch.get_simulation_parameters().max_number_of_rays = N_rays * 100; + + OptixRunner batch_runner; + batch_runner.set_batch_size(1000); + ASSERT_EQ(batch_runner.initialize(), RunnerStatus::SUCCESS); + ASSERT_EQ(batch_runner.setup_simulation(&sd_batch), RunnerStatus::SUCCESS); + ASSERT_EQ(batch_runner.run_simulation(), RunnerStatus::SUCCESS); + + SimulationResult batch_result; + ASSERT_EQ(batch_runner.report_simulation(&batch_result, 0), RunnerStatus::SUCCESS); + const int batch_hits = batch_result.get_number_of_records(); + + EXPECT_EQ(ref_hits, N_rays); + EXPECT_EQ(batch_hits, N_rays); + + // With a user-supplied batch of 1000 and 10000 rays, at least 10 iterations + // are required regardless of available GPU memory. + EXPECT_GE(batch_runner.get_N_run_iterations(), 10u); +} + +// --------------------------------------------------------------------------- +// Batch size smaller than total rays forces multiple iterations. +// --------------------------------------------------------------------------- + +TEST(OptixRunnerBatchSize, SmallBatchMultipleIterations) +{ + const int N_rays = 5000; + const int batch = 500; // 10+ iterations needed + + SimulationData sd; + element_ptr plate1, plate2; + make_two_plate_sd(sd, plate1, plate2); + sd.get_simulation_parameters().number_of_rays = N_rays; + sd.get_simulation_parameters().max_number_of_rays = N_rays * 100; + + OptixRunner runner; + runner.set_batch_size(batch); + ASSERT_EQ(runner.initialize(), RunnerStatus::SUCCESS); + ASSERT_EQ(runner.setup_simulation(&sd), RunnerStatus::SUCCESS); + ASSERT_EQ(runner.run_simulation(), RunnerStatus::SUCCESS); + + SimulationResult result; + ASSERT_EQ(runner.report_simulation(&result, 0), RunnerStatus::SUCCESS); + + EXPECT_EQ(result.get_number_of_records(), N_rays); + + // With a small batch the runner must have generated at least N_rays sun rays + EXPECT_GE(runner.get_N_sun_rays(), static_cast(N_rays)); + + // Multiple iterations must have been needed + EXPECT_GE(runner.get_N_run_iterations(), 10u); +} + +// --------------------------------------------------------------------------- +// Batch size >= N_rays: should complete in exactly one iteration. +// --------------------------------------------------------------------------- + +TEST(OptixRunnerBatchSize, BatchSizeExceedingRaysCompletesInOneIteration) +{ + const int N_rays = 1000; + + SimulationData sd; + element_ptr plate1, plate2; + make_two_plate_sd(sd, plate1, plate2); + sd.get_simulation_parameters().number_of_rays = N_rays; + sd.get_simulation_parameters().max_number_of_rays = N_rays * 100; + + OptixRunner runner; + runner.set_batch_size(N_rays * 8); // larger than N_rays + ASSERT_EQ(runner.initialize(), RunnerStatus::SUCCESS); + ASSERT_EQ(runner.setup_simulation(&sd), RunnerStatus::SUCCESS); + ASSERT_EQ(runner.run_simulation(), RunnerStatus::SUCCESS); + + SimulationResult result; + ASSERT_EQ(runner.report_simulation(&result, 0), RunnerStatus::SUCCESS); + + EXPECT_EQ(result.get_number_of_records(), N_rays); + EXPECT_EQ(runner.get_N_run_iterations(), 1u); +} diff --git a/google-tests/unit-tests/simulation_runner/optix_runner/geometry_intersection_test.cpp b/google-tests/unit-tests/simulation_runner/optix_runner/geometry_intersection_test.cpp index 1f34016c..b1dd3f36 100644 --- a/google-tests/unit-tests/simulation_runner/optix_runner/geometry_intersection_test.cpp +++ b/google-tests/unit-tests/simulation_runner/optix_runner/geometry_intersection_test.cpp @@ -8,12 +8,13 @@ using SolTrace::Runner::RunnerStatus; const double Z_ELEM = 50.0; +const double Z_BACKSTOP = Z_ELEM - 0.5 * Z_ELEM; const double TOL = 1e-6; const uint_fast64_t NRAYS = 10000; -void set_default_sd(SimulationData &sd, - surface_ptr surf, - aperture_ptr ap) +element_id set_default_sd(SimulationData &sd, + surface_ptr surf, + aperture_ptr ap) { sd.clear(); @@ -22,7 +23,7 @@ void set_default_sd(SimulationData &sd, sun->set_position(0, 0, 100); sd.add_ray_source(sun); - // Make reflective flat el + // Make target element element_ptr el = make_element(); el->set_origin(0, 0, Z_ELEM); el->set_aim_vector(0, 0, 100); // Face up towards sun @@ -36,7 +37,20 @@ void set_default_sd(SimulationData &sd, el->set_name("el"); // Add element to stage - sd.add_element(el); + element_id id = sd.add_element(el); + + // Back stop element that is bigger than the created element so that the + // testing element casts a shadow on this big thing. + element_ptr stop = make_element(); + double xlb, xub, ylb, yub; + ap->bounding_box(xlb, xub, ylb, yub); + const double sx = std::max(fabs(xlb), fabs(xub)) + 1.0; + const double sy = std::max(fabs(ylb), fabs(yub)) + 1.0; + stop->set_origin(0, 0, Z_BACKSTOP); + stop->set_aim_vector(0, 0, 100); + stop->set_surface(make_surface()); + stop->set_aperture(make_aperture(sx, sy)); + sd.add_element(stop); // Set parameters SimulationParameters ¶ms = sd.get_simulation_parameters(); @@ -45,6 +59,8 @@ void set_default_sd(SimulationData &sd, params.include_optical_errors = false; params.include_sun_shape_errors = false; params.seed = 123; + + return id; } TEST(OptixRunner, FlatRectangle) @@ -55,7 +71,7 @@ TEST(OptixRunner, FlatRectangle) auto aper = make_aperture(XL, YL); SimulationData sd; - set_default_sd(sd, surf, aper); + element_id test_elid = set_default_sd(sd, surf, aper); SimulationResult result; OptixRunner runner; @@ -77,9 +93,24 @@ TEST(OptixRunner, FlatRectangle) glm::dvec3 p0, p1; rr->get_position(0, p0); rr->get_position(1, p1); + auto id = rr->get_element(1); EXPECT_NEAR(p0[0], p1[0], TOL) << "ray " << i; EXPECT_NEAR(p0[1], p1[1], TOL) << "ray " << i; - EXPECT_NEAR(p1[2], Z_ELEM, TOL * Z_ELEM) << "ray " << i; + + if (id == test_elid) + { + // We hit the test element. Check that the height is as expected + EXPECT_NEAR(p1[2], Z_ELEM, TOL * Z_ELEM) << "ray " << i; + // And that we are in the aperture + EXPECT_TRUE(aper->is_in(p1[0], p1[1])); + } + else + { + // We hit the back stop element. Check that the height is as expected + EXPECT_NEAR(p1[2], Z_BACKSTOP, TOL * Z_ELEM); + // And that we are not in the aperture. + EXPECT_FALSE(aper->is_in(p1[0], p1[1])); + } } } @@ -90,7 +121,7 @@ TEST(OptixRunner, FlatEquilateralTriangle) auto aper = make_aperture(d); SimulationData sd; - set_default_sd(sd, surf, aper); + element_id test_elid = set_default_sd(sd, surf, aper); SimulationResult result; OptixRunner runner; @@ -112,9 +143,20 @@ TEST(OptixRunner, FlatEquilateralTriangle) glm::dvec3 p0, p1; rr->get_position(0, p0); rr->get_position(1, p1); + auto id = rr->get_element(1); EXPECT_NEAR(p0[0], p1[0], TOL) << "ray " << i; EXPECT_NEAR(p0[1], p1[1], TOL) << "ray " << i; - EXPECT_NEAR(p1[2], Z_ELEM, TOL * Z_ELEM) << "ray " << i; + + if (id == test_elid) + { + EXPECT_NEAR(p1[2], Z_ELEM, TOL * Z_ELEM) << "ray " << i; + EXPECT_TRUE(aper->is_in(p1[0], p1[1])); + } + else + { + EXPECT_NEAR(p1[2], Z_BACKSTOP, TOL * Z_ELEM); + EXPECT_FALSE(aper->is_in(p1[0], p1[1])); + } } } @@ -126,7 +168,7 @@ TEST(OptixRunner, FlatTriangle) auto aper = make_aperture(x1, y1, x2, y2, x3, y3); SimulationData sd; - set_default_sd(sd, surf, aper); + element_id test_elid = set_default_sd(sd, surf, aper); SimulationResult result; OptixRunner runner; @@ -148,9 +190,20 @@ TEST(OptixRunner, FlatTriangle) glm::dvec3 p0, p1; rr->get_position(0, p0); rr->get_position(1, p1); + auto id = rr->get_element(1); EXPECT_NEAR(p0[0], p1[0], TOL) << "ray " << i; EXPECT_NEAR(p0[1], p1[1], TOL) << "ray " << i; - EXPECT_NEAR(p1[2], Z_ELEM, TOL * Z_ELEM) << "ray " << i; + + if (id == test_elid) + { + EXPECT_NEAR(p1[2], Z_ELEM, TOL * Z_ELEM) << "ray " << i; + EXPECT_TRUE(aper->is_in(p1[0], p1[1])); + } + else + { + EXPECT_NEAR(p1[2], Z_BACKSTOP, TOL * Z_ELEM); + EXPECT_FALSE(aper->is_in(p1[0], p1[1])); + } } } @@ -164,7 +217,7 @@ TEST(OptixRunner, FlatQuadrilateral) x1, y1, x2, y2, x3, y3, x4, y4); SimulationData sd; - set_default_sd(sd, surf, aper); + element_id test_elid = set_default_sd(sd, surf, aper); SimulationResult result; OptixRunner runner; @@ -186,9 +239,20 @@ TEST(OptixRunner, FlatQuadrilateral) glm::dvec3 p0, p1; rr->get_position(0, p0); rr->get_position(1, p1); + auto id = rr->get_element(1); EXPECT_NEAR(p0[0], p1[0], TOL) << "ray " << i; EXPECT_NEAR(p0[1], p1[1], TOL) << "ray " << i; - EXPECT_NEAR(p1[2], Z_ELEM, TOL * Z_ELEM) << "ray " << i; + + if (id == test_elid) + { + EXPECT_NEAR(p1[2], Z_ELEM, TOL * Z_ELEM) << "ray " << i; + EXPECT_TRUE(aper->is_in(p1[0], p1[1])); + } + else + { + EXPECT_NEAR(p1[2], Z_BACKSTOP, TOL * Z_ELEM); + EXPECT_FALSE(aper->is_in(p1[0], p1[1])); + } } } @@ -203,7 +267,7 @@ TEST(OptixRunner, ParabolaRectangle) auto aper = make_aperture(XL, YL); SimulationData sd; - set_default_sd(sd, surf, aper); + element_id test_elid = set_default_sd(sd, surf, aper); SimulationResult result; OptixRunner runner; @@ -225,10 +289,21 @@ TEST(OptixRunner, ParabolaRectangle) glm::dvec3 p0, p1; rr->get_position(0, p0); rr->get_position(1, p1); + auto id = rr->get_element(1); EXPECT_NEAR(p0[0], p1[0], TOL) << "ray " << i; EXPECT_NEAR(p0[1], p1[1], TOL) << "ray " << i; - const double z1 = Z_ELEM + 0.5 * CX * p1[0] * p1[0] + 0.5 * CY * p1[1] * p1[1]; - EXPECT_NEAR(p1[2], z1, TOL * Z_ELEM) << "ray " << i; + + if (id == test_elid) + { + const double z1 = Z_ELEM + 0.5 * CX * p1[0] * p1[0] + 0.5 * CY * p1[1] * p1[1]; + EXPECT_NEAR(p1[2], z1, TOL * Z_ELEM) << "ray " << i; + EXPECT_TRUE(aper->is_in(p1[0], p1[1])); + } + else + { + EXPECT_NEAR(p1[2], Z_BACKSTOP, TOL * Z_ELEM); + EXPECT_FALSE(aper->is_in(p1[0], p1[1])); + } } } @@ -240,7 +315,7 @@ TEST(OptixRunner, Cylinder) auto aper = make_aperture(2 * R, YL); SimulationData sd; - set_default_sd(sd, surf, aper); + element_id test_elid = set_default_sd(sd, surf, aper); SimulationResult result; OptixRunner runner; @@ -262,9 +337,160 @@ TEST(OptixRunner, Cylinder) glm::dvec3 p0, p1; rr->get_position(0, p0); rr->get_position(1, p1); + auto id = rr->get_element(1); EXPECT_NEAR(p0[0], p1[0], TOL) << "ray " << i; EXPECT_NEAR(p0[1], p1[1], TOL) << "ray " << i; - const double z1 = Z_ELEM + sqrt(R * R - p1[0] * p1[0]); - EXPECT_NEAR(p1[2], z1, TOL * Z_ELEM) << "ray " << i; + + if (id == test_elid) + { + const double z1 = Z_ELEM + sqrt(R * R - p1[0] * p1[0]); + EXPECT_NEAR(p1[2], z1, TOL * Z_ELEM) << "ray " << i; + EXPECT_TRUE(aper->is_in(p1[0], p1[1])); + } + else + { + EXPECT_NEAR(p1[2], Z_BACKSTOP, TOL * Z_ELEM); + EXPECT_FALSE(aper->is_in(p1[0], p1[1])); + } + } +} + +TEST(OptixRunner, FlatCircle) +{ + const double R = 5.0; + auto surf = make_surface(); + auto aper = make_aperture(2 * R); + + SimulationData sd; + element_id test_elid = set_default_sd(sd, surf, aper); + SimulationResult result; + + OptixRunner runner; + RunnerStatus sts = runner.initialize(); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + sts = runner.setup_simulation(&sd); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + sts = runner.run_simulation(); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + sts = runner.report_simulation(&result, 0); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + + ASSERT_EQ(result.get_number_of_records(), + sd.get_simulation_parameters().number_of_rays); + for (int i = 0; i < (int)result.get_number_of_records(); ++i) + { + auto rr = result[i]; + ASSERT_GE(rr->get_number_of_interactions(), 2); + glm::dvec3 p0, p1; + rr->get_position(0, p0); + rr->get_position(1, p1); + auto id = rr->get_element(1); + EXPECT_NEAR(p0[0], p1[0], TOL) << "ray " << i; + EXPECT_NEAR(p0[1], p1[1], TOL) << "ray " << i; + + if (id == test_elid) + { + EXPECT_NEAR(p1[2], Z_ELEM, TOL * Z_ELEM) << "ray " << i; + EXPECT_TRUE(aper->is_in(p1[0], p1[1])); + } + else + { + EXPECT_NEAR(p1[2], Z_BACKSTOP, TOL * Z_ELEM); + EXPECT_FALSE(aper->is_in(p1[0], p1[1])); + } + } +} + +TEST(OptixRunner, FlatHexagon) +{ + const double S = 5.0; + auto surf = make_surface(); + auto aper = make_aperture(2 * S); + + SimulationData sd; + element_id test_elid = set_default_sd(sd, surf, aper); + SimulationResult result; + + OptixRunner runner; + RunnerStatus sts = runner.initialize(); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + sts = runner.setup_simulation(&sd); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + sts = runner.run_simulation(); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + sts = runner.report_simulation(&result, 0); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + + ASSERT_EQ(result.get_number_of_records(), + sd.get_simulation_parameters().number_of_rays); + for (int i = 0; i < (int)result.get_number_of_records(); ++i) + { + auto rr = result[i]; + ASSERT_GE(rr->get_number_of_interactions(), 2); + glm::dvec3 p0, p1; + rr->get_position(0, p0); + rr->get_position(1, p1); + auto id = rr->get_element(1); + EXPECT_NEAR(p0[0], p1[0], TOL) << "ray " << i; + EXPECT_NEAR(p0[1], p1[1], TOL) << "ray " << i; + + if (id == test_elid) + { + EXPECT_NEAR(p1[2], Z_ELEM, TOL * Z_ELEM) << "ray " << i; + EXPECT_TRUE(aper->is_in(p1[0], p1[1])); + } + else + { + EXPECT_NEAR(p1[2], Z_BACKSTOP, TOL * Z_ELEM); + EXPECT_FALSE(aper->is_in(p1[0], p1[1])); + } + } +} + +TEST(OptixRunner, FlatAnnulus) +{ + const double R0 = 5.0; + const double R1 = 10.0; + const double ARC = 2 * PI; + auto surf = make_surface(); + auto aper = make_aperture(R0, R1, ARC); + + SimulationData sd; + element_id test_elid = set_default_sd(sd, surf, aper); + SimulationResult result; + + OptixRunner runner; + RunnerStatus sts = runner.initialize(); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + sts = runner.setup_simulation(&sd); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + sts = runner.run_simulation(); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + sts = runner.report_simulation(&result, 0); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + + ASSERT_EQ(result.get_number_of_records(), + sd.get_simulation_parameters().number_of_rays); + for (int i = 0; i < (int)result.get_number_of_records(); ++i) + { + auto rr = result[i]; + ASSERT_GE(rr->get_number_of_interactions(), 2); + glm::dvec3 p0, p1; + rr->get_position(0, p0); + rr->get_position(1, p1); + auto id = rr->get_element(1); + EXPECT_NEAR(p0[0], p1[0], TOL) << "ray " << i; + EXPECT_NEAR(p0[1], p1[1], TOL) << "ray " << i; + + if (id == test_elid) + { + EXPECT_NEAR(p1[2], Z_ELEM, TOL * Z_ELEM) << "ray " << i; + EXPECT_TRUE(aper->is_in(p1[0], p1[1])); + } + else + { + EXPECT_NEAR(p1[2], Z_BACKSTOP, TOL * Z_ELEM); + EXPECT_FALSE(aper->is_in(p1[0], p1[1])); + } } } diff --git a/google-tests/unit-tests/simulation_runner/optix_runner/gpu_tower_demo.cpp b/google-tests/unit-tests/simulation_runner/optix_runner/gpu_tower_demo.cpp index ea771d5a..f99fb8e8 100644 --- a/google-tests/unit-tests/simulation_runner/optix_runner/gpu_tower_demo.cpp +++ b/google-tests/unit-tests/simulation_runner/optix_runner/gpu_tower_demo.cpp @@ -159,3 +159,80 @@ TEST(GpuTowerDemo, OptixRunnerWithStages) // sts = runner.report_simulation(); ASSERT_EQ(sts, RunnerStatus::SUCCESS); } + +static void setup_tower_sd(SimulationData &sd, uint_fast64_t nrays) +{ + auto sun = make_ray_source(); + sun->set_position(0.0, 0.0, 100.0); + sd.add_ray_source(sun); + + auto absorber = make_element(); + absorber->set_origin(0.0, 0.0, 10.0); + absorber->set_aim_vector(0.0, 5.0, 0.0); + absorber->set_surface(make_surface()); + absorber->set_aperture(make_aperture(2.0, 2.0)); + absorber->get_front_optical_properties()->set_ideal_absorption(); + + auto st1 = make_stage(1); + st1->set_origin(0.0, 0.0, 0.0); + st1->set_aim_vector(0.0, 0.0, 1.0); + st1->add_element(absorber); + + auto st0 = make_stage(0); + st0->set_origin(0.0, 0.0, 0.0); + st0->set_aim_vector(0.0, 0.0, 1.0); + + const double spacing = PI / 4.0; + for (int i = -1; i < 4; ++i) + { + auto el = make_element(); + el->get_front_optical_properties()->reflectivity = 1.0; + + glm::dvec3 pos = {5 * sin(i * spacing), 5 * cos(i * spacing), 0.0}; + el->set_origin(pos); + glm::dvec3 rvec = glm::normalize(absorber->get_origin_global() - pos); + glm::dvec3 svec = glm::normalize(sun->get_position()); + glm::dvec3 avec = 0.5 * rvec + 0.5 * svec; + el->set_aim_vector(pos + 100.0 * avec); + el->set_zrot(30.0 * i); + el->set_surface(make_surface()); + el->set_aperture(make_aperture(1.0, 1.95)); + st0->add_element(el); + } + + sd.add_stage(st0); + sd.add_stage(st1); + + SimulationParameters ¶ms = sd.get_simulation_parameters(); + params.number_of_rays = nrays; + params.max_number_of_rays = nrays * 100; + params.include_optical_errors = false; + params.include_sun_shape_errors = false; + params.seed = 12345; +} + +TEST(OptixRunner, RaysLaunchedEqualsRequestedAfterRun) +{ + const uint_fast64_t NRAYS = 100; + SimulationData sd; + setup_tower_sd(sd, NRAYS); + + OptixRunner runner; + RunnerStatus sts = runner.initialize(); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + sts = runner.setup_simulation(&sd); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + + // Before running, no rays have been launched yet + EXPECT_EQ(runner.get_number_rays_launched(), static_cast(0)); + + sts = runner.run_simulation(); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + + EXPECT_GE(runner.get_number_rays_launched(), NRAYS); + + const uint_fast64_t rays_traced = runner.get_number_rays_traced(); + EXPECT_GT(rays_traced, static_cast(0)); + EXPECT_LE(rays_traced, runner.get_number_rays_launched()); + EXPECT_EQ(rays_traced, NRAYS); +} diff --git a/google-tests/unit-tests/simulation_runner/optix_runner/max_ray_depth_test.cpp b/google-tests/unit-tests/simulation_runner/optix_runner/max_ray_depth_test.cpp new file mode 100644 index 00000000..59702454 --- /dev/null +++ b/google-tests/unit-tests/simulation_runner/optix_runner/max_ray_depth_test.cpp @@ -0,0 +1,234 @@ +#include + +#include +#include +#include +#include +#include // OptixCSP::DEFAULT_MAX_TRACE_DEPTH +#include // OptixCSP::HitType + +using SolTrace::Runner::RunnerStatus; + +// Reuse the two-plate scene defined in two_plate_test.cpp. +// Plate 1 is ideal-reflective; plate 2 is ideal-absorptive. +// Each ray that hits the scene makes exactly 2 element interactions: +// 1. reflect off plate 1 +// 2. absorb on plate 2 +void make_two_plate_sd(SimulationData &sd, element_ptr &plate1, element_ptr &plate2); + +// --------------------------------------------------------------------------- +// set_max_ray_depth / get_max_ray_depth accessor tests (no GPU required) +// --------------------------------------------------------------------------- + +TEST(OptixRunnerMaxRayDepth, DefaultIsDefaultMaxTraceDepth) +{ + OptixRunner runner; + EXPECT_EQ(runner.get_max_ray_depth(), OptixCSP::DEFAULT_MAX_TRACE_DEPTH); +} + +TEST(OptixRunnerMaxRayDepth, SetAndGet) +{ + OptixRunner runner; + runner.set_max_ray_depth(10); + EXPECT_EQ(runner.get_max_ray_depth(), 10u); +} + +// Minimum valid depth is 2; setting exactly 2 must be accepted without clamping. +TEST(OptixRunnerMaxRayDepth, MinimumBoundaryAccepted) +{ + OptixRunner runner; + runner.set_max_ray_depth(2); + EXPECT_EQ(runner.get_max_ray_depth(), 2u); +} + +// Maximum valid depth is 255; setting exactly 255 must be accepted without clamping. +TEST(OptixRunnerMaxRayDepth, MaximumBoundaryAccepted) +{ + OptixRunner runner; + runner.set_max_ray_depth(255); + EXPECT_EQ(runner.get_max_ray_depth(), 255u); +} + +// Depth of 0 is below the minimum; must be clamped to 2 with a warning. +TEST(OptixRunnerMaxRayDepth, ZeroClampsToMinimum) +{ + OptixRunner runner; + testing::internal::CaptureStderr(); + runner.set_max_ray_depth(0); + const std::string output = testing::internal::GetCapturedStderr(); + EXPECT_EQ(runner.get_max_ray_depth(), 2u); + EXPECT_NE(output.find("WARNING"), std::string::npos); +} + +// Depth of 1 is below the minimum; must be clamped to 2 with a warning. +TEST(OptixRunnerMaxRayDepth, OneClampsToMinimum) +{ + OptixRunner runner; + testing::internal::CaptureStderr(); + runner.set_max_ray_depth(1); + const std::string output = testing::internal::GetCapturedStderr(); + EXPECT_EQ(runner.get_max_ray_depth(), 2u); + EXPECT_NE(output.find("WARNING"), std::string::npos); +} + +// Depth of 256 exceeds the maximum; must be clamped to 255 with a warning. +TEST(OptixRunnerMaxRayDepth, ExceedingMaxClampsTo255) +{ + OptixRunner runner; + testing::internal::CaptureStderr(); + runner.set_max_ray_depth(256); + const std::string output = testing::internal::GetCapturedStderr(); + EXPECT_EQ(runner.get_max_ray_depth(), 255u); + EXPECT_NE(output.find("WARNING"), std::string::npos); +} + +// Large values (e.g. passing a big int) must also clamp to 255 with a warning. +TEST(OptixRunnerMaxRayDepth, LargeValueClampsTo255) +{ + OptixRunner runner; + testing::internal::CaptureStderr(); + runner.set_max_ray_depth(10000); + const std::string output = testing::internal::GetCapturedStderr(); + EXPECT_EQ(runner.get_max_ray_depth(), 255u); + EXPECT_NE(output.find("WARNING"), std::string::npos); +} + +// An out-of-range value is always clamped and stored, replacing any previous value. +TEST(OptixRunnerMaxRayDepth, ClampedValueOverwritesPreviousValue) +{ + OptixRunner runner; + runner.set_max_ray_depth(20); + ASSERT_EQ(runner.get_max_ray_depth(), 20u); + + testing::internal::CaptureStderr(); + runner.set_max_ray_depth(0); + const std::string output = testing::internal::GetCapturedStderr(); + EXPECT_EQ(runner.get_max_ray_depth(), 2u); + EXPECT_NE(output.find("WARNING"), std::string::npos); +} + +// Setting a valid depth after an out-of-range call must work normally. +TEST(OptixRunnerMaxRayDepth, ValidSetAfterClamp) +{ + OptixRunner runner; + testing::internal::CaptureStderr(); + runner.set_max_ray_depth(0); + testing::internal::GetCapturedStderr(); + + runner.set_max_ray_depth(8); + EXPECT_EQ(runner.get_max_ray_depth(), 8u); +} + +// --------------------------------------------------------------------------- +// GPU trace test: verify the depth limit is actually enforced during tracing. +// +// The two-plate scene produces exactly 2 element interactions per ray under +// normal conditions (reflect off plate 1, absorb on plate 2). Setting +// max_ray_depth = 2 reduces the maximum interactions per ray to 1, so the +// second interaction is cut off. We verify that no ray in the hit buffer +// carries more than (max_ray_depth - 1) element interactions. +// --------------------------------------------------------------------------- +TEST(OptixRunnerMaxRayDepth, TraceDepthNotExceeded) +{ + SimulationData sd; + element_ptr plate1, plate2; + make_two_plate_sd(sd, plate1, plate2); + + const uint_fast64_t test_max_depth = 2; // allows at most 1 interaction per ray + OptixRunner runner; + runner.set_max_ray_depth(test_max_depth); + ASSERT_EQ(runner.get_max_ray_depth(), static_cast(test_max_depth)); + + RunnerStatus sts = runner.initialize(); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + sts = runner.setup_simulation(&sd); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + sts = runner.run_simulation(); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + + OptixCSP::SolTraceSystem *sys = runner.get_optix_system(); + std::vector hp_vec; + std::vector raynumber_vec; + std::vector element_id_vec; + std::vector hit_type_vec; + sys->get_hp_output(hp_vec, raynumber_vec, element_id_vec, hit_type_vec); + + // Walk through every record. HIT_CREATE marks the start of a new ray; + // all other types are element interactions. No ray may accumulate more + // than (max_ray_depth - 1) interactions. + const uint_fast64_t max_interactions = test_max_depth - 1; + uint_fast64_t current_interactions = 0; + for (uint8_t ht : hit_type_vec) + { + if (ht == OptixCSP::HitType::HIT_CREATE) + { + current_interactions = 0; + } + else + { + ++current_interactions; + EXPECT_LE(current_interactions, max_interactions) + << "A ray exceeded max_ray_depth - 1 element interactions"; + } + } +} + +// --------------------------------------------------------------------------- +// GPU trace tests: verify the depth-exceeded counter behaves correctly. +// --------------------------------------------------------------------------- + +// With max_depth at the default (5), the two-plate scene (reflect→absorb) uses +// only 2 depth slots per ray — well within the limit. The counter must stay 0. +TEST(OptixRunnerMaxRayDepth, DepthExceededCounterIsZeroWhenDepthNotExceeded) +{ + SimulationData sd; + element_ptr plate1, plate2; + make_two_plate_sd(sd, plate1, plate2); + + OptixRunner runner; + // Leave max_ray_depth at the default (5); the scene only needs depth 2. + RunnerStatus sts = runner.initialize(); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + sts = runner.setup_simulation(&sd); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + sts = runner.run_simulation(); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + + EXPECT_EQ(runner.get_N_depth_exceeded_rays(), 0u); +} + +// With max_depth=2, the two-plate scene where BOTH plates are reflective forces +// rays that reach plate 2 to exceed the depth limit (new_depth=2 >= max_depth=2, +// and plate 2 is non-absorbing). Not all reflections from plate 1 geometrically +// reach plate 2, so counter <= n_hit. The key check is that counter > 0 and +// never exceeds the total number of hit rays. +TEST(OptixRunnerMaxRayDepth, DepthExceededCounterCountsTerminatedReflectedRays) +{ + SimulationData sd; + element_ptr plate1, plate2; + make_two_plate_sd(sd, plate1, plate2); + + // Override plate 2 to be reflective so hitting it at max_depth triggers the counter. + plate2->get_front_optical_properties()->set_ideal_reflection(); + plate2->get_back_optical_properties()->set_ideal_reflection(); + + OptixRunner runner; + runner.set_max_ray_depth(2); // max interactions per ray = 1 (plate 1 only) + ASSERT_EQ(runner.get_max_ray_depth(), 2u); + + RunnerStatus sts = runner.initialize(); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + sts = runner.setup_simulation(&sd); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + sts = runner.run_simulation(); + ASSERT_EQ(sts, RunnerStatus::SUCCESS); + + const uint_fast64_t counter = runner.get_N_depth_exceeded_rays(); + const uint_fast64_t n_hit = runner.get_number_rays_traced(); + + // Every ray that reached plate 2 (a subset of the plate 1 hits — + // some reflections miss plate 2 entirely) must have incremented the counter. + EXPECT_GT(counter, 0u) << "Expected depth-exceeded counter to be non-zero"; + EXPECT_LE(counter, n_hit) + << "Counter cannot exceed the total number of hit rays"; +} diff --git a/google-tests/unit-tests/simulation_runner/optix_runner/ray_position_sampling_test.cpp b/google-tests/unit-tests/simulation_runner/optix_runner/ray_position_sampling_test.cpp new file mode 100644 index 00000000..244a9687 --- /dev/null +++ b/google-tests/unit-tests/simulation_runner/optix_runner/ray_position_sampling_test.cpp @@ -0,0 +1,309 @@ +// ray_position_sampling_test.cpp +// +// Tests for sun parallelogram position sampling (GenType::HALTON and +// GenType::RANDOM). Each test fires rays straight down onto a large flat plate +// with no sun-shape or optical errors, then inspects position[0] of every ray +// record — the ray-generation point on the sun parallelogram — to verify the +// spatial distribution. + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +using SolTrace::Runner::RunnerStatus; + +namespace +{ + // ---- Minimal KS-test infrastructure ------------------------------------ + // (duplicated from sun_test.cpp — kept local to this TU) + + static double clamp01(double x) + { + return x <= 0.0 ? 0.0 : (x >= 1.0 ? 1.0 : x); + } + + static double ks_statistic(const std::vector& samples, + const std::function& cdf) + { + if (samples.empty()) return 1.0; + std::vector sorted = samples; + std::sort(sorted.begin(), sorted.end()); + const double n = static_cast(sorted.size()); + double d = 0.0; + for (size_t i = 0; i < sorted.size(); ++i) + { + const double f = clamp01(cdf(sorted[i])); + const double emp_lo = static_cast(i) / n; + const double emp_hi = static_cast(i + 1) / n; + d = std::max(d, std::abs(f - emp_lo)); + d = std::max(d, std::abs(emp_hi - f)); + } + return d; + } + + static double ks_pvalue_asymptotic(double d, size_t n) + { + if (n == 0) return 0.0; + if (d <= 0.0) return 1.0; + const double sqrtn = std::sqrt(static_cast(n)); + const double x = (sqrtn + 0.12 + 0.11 / sqrtn) * d; + double sum = 0.0; + for (int k = 1; k <= 100; ++k) + { + const double term = std::exp(-2.0 * k * k * x * x); + sum += (k % 2 == 1) ? term : -term; + if (term < 1.0e-12) break; + } + return clamp01(2.0 * sum); + } + + static double ks_pvalue(const std::vector& samples, + const std::function& cdf) + { + return ks_pvalue_asymptotic(ks_statistic(samples, cdf), samples.size()); + } + + // Test that `coords` (a 1-D projection of source positions) is consistent + // with a uniform distribution. Coordinates are normalised to [0,1] using + // the empirical range before the KS test. For N >= 1 000 from a true + // uniform distribution the normalisation bias is O(1/N) — negligible + // against the ~1% critical value. + static double ks_pvalue_uniform1d(const std::vector& coords) + { + if (coords.size() < 2) return 0.0; + const double lo = *std::min_element(coords.begin(), coords.end()); + const double hi = *std::max_element(coords.begin(), coords.end()); + if (hi - lo < 1.0e-9) return 0.0; + + std::vector u; + u.reserve(coords.size()); + for (double c : coords) + u.push_back((c - lo) / (hi - lo)); + + return ks_pvalue(u, [](double x) { return x; }); + } + + // ---- Scene helpers ----------------------------------------------------- + + // Build a SimulationData with a large flat plate at z=50 (200 × 200 world + // units), perfectly reflective, no optical or sun-shape errors. The plate + // is intentionally much larger than any realistic sun parallelogram so that + // all generated rays hit it and every source position is recorded. + static void make_large_plate_scene(SimulationData& sd, int seed = 42) + { + sd.clear(); + + auto stage = make_stage(0); + stage->set_origin(0, 0, 0); + stage->set_aim_vector(0, 0, 1); + stage->set_name("stage"); + + auto plate = make_element(); + plate->set_origin(0, 0, 50); + plate->set_aim_vector(0, 0, 100); + plate->set_surface(make_surface()); + plate->set_aperture(make_aperture(200, 200)); + OpticalProperties op(InteractionType::REFLECTION, + DistributionType::NONE, + 0, 1, 0, 0, 0, 0); + plate->set_front_optical_properties(op); + plate->set_back_optical_properties(op); + plate->set_name("plate"); + + stage->add_element(plate); + sd.add_stage(stage); + + SimulationParameters& params = sd.get_simulation_parameters(); + params.number_of_rays = 20000; + params.max_number_of_rays = params.number_of_rays * 10; + params.include_optical_errors = false; + params.include_sun_shape_errors = false; + params.seed = seed; + } + + // Add a sun pointing straight down (position overhead at z=100) to an + // already-configured SimulationData, using the requested gen_type. + static void add_sun(SimulationData& sd, SolTrace::Data::GenType gen_type) + { + auto sun = make_ray_source(); + sun->set_position(0, 0, 100); + // PILLBOX shape with no sun-shape errors keeps rays exactly parallel, + // so hit positions on the plate are the source positions projected down. + sun->set_shape(SolTrace::Data::SunShape::PILLBOX, 0.0, 4.65, 0.0); + sun->set_gen_type(gen_type); + sd.add_ray_source(sun); + } + + // Run the simulation and populate `result`. Returns false on any failure. + static bool run_sim(SimulationData& sd, SimulationResult& result) + { + OptixRunner runner; + if (runner.initialize() != RunnerStatus::SUCCESS) return false; + if (runner.setup_simulation(&sd) != RunnerStatus::SUCCESS) return false; + if (runner.run_simulation() != RunnerStatus::SUCCESS) return false; + if (runner.report_simulation(&result, 0) != RunnerStatus::SUCCESS) return false; + return true; + } + + // Extract the X and Y components of position[0] (the ray-generation point) + // for every ray that has at least one surface interaction recorded. + static void collect_source_xy(const SimulationResult& result, + std::vector& xs, + std::vector& ys) + { + xs.clear(); + ys.clear(); + for (int i = 0; i < result.get_number_of_records(); ++i) + { + auto rec = result[i]; + if (!rec || rec->get_number_of_interactions() < 1) continue; + glm::dvec3 p; + rec->get_position(0, p); + xs.push_back(p[0]); + ys.push_back(p[1]); + } + } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +// GenType::RANDOM: the marginal X and Y distributions of source positions +// should be statistically consistent with a uniform distribution over the +// sun parallelogram. +TEST(RayPositionSampling, Random_UniformMarginals) +{ + SimulationData sd; + make_large_plate_scene(sd); + add_sun(sd, SolTrace::Data::GenType::RANDOM); + + SimulationResult result; + ASSERT_TRUE(run_sim(sd, result)); + ASSERT_GT(result.get_number_of_records(), 0); + + std::vector xs, ys; + collect_source_xy(result, xs, ys); + ASSERT_GE(xs.size(), 1000u); + + const double p_x = ks_pvalue_uniform1d(xs); + const double p_y = ks_pvalue_uniform1d(ys); + + EXPECT_GT(p_x, 1.0e-6) << "X marginal deviates significantly from uniform"; + EXPECT_GT(p_y, 1.0e-6) << "Y marginal deviates significantly from uniform"; +} + +// GenType::HALTON: same uniformity requirement, via the Halton low-discrepancy +// sequence (bases 2 and 3 for the two parallelogram axes). +TEST(RayPositionSampling, Halton_UniformMarginals) +{ + SimulationData sd; + make_large_plate_scene(sd); + add_sun(sd, SolTrace::Data::GenType::HALTON); + + SimulationResult result; + ASSERT_TRUE(run_sim(sd, result)); + ASSERT_GT(result.get_number_of_records(), 0); + + std::vector xs, ys; + collect_source_xy(result, xs, ys); + ASSERT_GE(xs.size(), 1000u); + + const double p_x = ks_pvalue_uniform1d(xs); + const double p_y = ks_pvalue_uniform1d(ys); + + EXPECT_GT(p_x, 1.0e-6) << "X marginal deviates significantly from uniform"; + EXPECT_GT(p_y, 1.0e-6) << "Y marginal deviates significantly from uniform"; +} + +// GenType::HALTON is deterministic: the seed field has no effect because the +// sequence depends only on the ray index. Two runs with different seeds must +// produce the same set of source positions. +TEST(RayPositionSampling, Halton_Deterministic) +{ + // Run 1: seed = 1 + SimulationData sd1; + make_large_plate_scene(sd1, /*seed=*/1); + add_sun(sd1, SolTrace::Data::GenType::HALTON); + + SimulationResult r1; + ASSERT_TRUE(run_sim(sd1, r1)); + + // Run 2: seed = 99999 (different, should be ignored by Halton) + SimulationData sd2; + make_large_plate_scene(sd2, /*seed=*/99999); + add_sun(sd2, SolTrace::Data::GenType::HALTON); + + SimulationResult r2; + ASSERT_TRUE(run_sim(sd2, r2)); + + ASSERT_EQ(r1.get_number_of_records(), r2.get_number_of_records()); + + std::vector xs1, ys1, xs2, ys2; + collect_source_xy(r1, xs1, ys1); + collect_source_xy(r2, xs2, ys2); + ASSERT_EQ(xs1.size(), xs2.size()); + + // Sort both and compare element-by-element. + std::sort(xs1.begin(), xs1.end()); + std::sort(xs2.begin(), xs2.end()); + std::sort(ys1.begin(), ys1.end()); + std::sort(ys2.begin(), ys2.end()); + + for (size_t i = 0; i < xs1.size(); ++i) + { + EXPECT_NEAR(xs1[i], xs2[i], 1.0e-4) + << "Halton X differs at sorted index " << i + << " — sequence is not deterministic across seeds"; + EXPECT_NEAR(ys1[i], ys2[i], 1.0e-4) + << "Halton Y differs at sorted index " << i + << " — sequence is not deterministic across seeds"; + } +} + +// GenType::RANDOM IS seed-dependent: two runs with different seeds must +// produce distinguishably different position sets. +TEST(RayPositionSampling, Random_SeedDependent) +{ + SimulationData sd1; + make_large_plate_scene(sd1, /*seed=*/1); + add_sun(sd1, SolTrace::Data::GenType::RANDOM); + + SimulationData sd2; + make_large_plate_scene(sd2, /*seed=*/2); + add_sun(sd2, SolTrace::Data::GenType::RANDOM); + + SimulationResult r1, r2; + ASSERT_TRUE(run_sim(sd1, r1)); + ASSERT_TRUE(run_sim(sd2, r2)); + ASSERT_GT(r1.get_number_of_records(), 0); + ASSERT_GT(r2.get_number_of_records(), 0); + + std::vector xs1, ys1, xs2, ys2; + collect_source_xy(r1, xs1, ys1); + collect_source_xy(r2, xs2, ys2); + ASSERT_FALSE(xs1.empty()); + ASSERT_FALSE(xs2.empty()); + + const double mean_x1 = std::accumulate(xs1.begin(), xs1.end(), 0.0) / xs1.size(); + const double mean_x2 = std::accumulate(xs2.begin(), xs2.end(), 0.0) / xs2.size(); + const double mean_y1 = std::accumulate(ys1.begin(), ys1.end(), 0.0) / ys1.size(); + const double mean_y2 = std::accumulate(ys2.begin(), ys2.end(), 0.0) / ys2.size(); + + // With 20 000 rays and different seeds the probability that both means + // coincide to within 1e-4 is negligible. + const bool x_differs = std::abs(mean_x1 - mean_x2) > 1.0e-4; + const bool y_differs = std::abs(mean_y1 - mean_y2) > 1.0e-4; + + EXPECT_TRUE(x_differs || y_differs) + << "Seeds 1 and 2 produced indistinguishable mean source positions; " + "seed variation may not be wired up for RANDOM gen_type."; +} diff --git a/google-tests/unit-tests/simulation_runner/optix_runner/sun_test.cpp b/google-tests/unit-tests/simulation_runner/optix_runner/sun_test.cpp index c2ff398e..5db70cdf 100644 --- a/google-tests/unit-tests/simulation_runner/optix_runner/sun_test.cpp +++ b/google-tests/unit-tests/simulation_runner/optix_runner/sun_test.cpp @@ -804,3 +804,58 @@ TEST(Sun, UserDefinedSunAngleDistribution) EXPECT_GT(frac_beyond_disc, 0.0); EXPECT_LT(frac_beyond_disc, 0.05); } + +// Verify that a warning is printed to stderr when Halton ray generation is used +// with max_number_of_rays exceeding UINT32_MAX, since the Halton index is truncated +// to 32 bits and positions will repeat. +TEST(Sun, HaltonWarningWhenExceedingUInt32Max) +{ + SimulationData sd; + element_ptr plate; + make_default_sd_sun(sd, plate); + + auto sun = make_ray_source(); + sun->set_position(0, 0, 100); + sun->set_gen_type(SolTrace::Data::GenType::HALTON); + sd.add_ray_source(sun); + + SimulationParameters& params = sd.get_simulation_parameters(); + params.number_of_rays = 1000; + params.max_number_of_rays = static_cast(std::numeric_limits::max()) + 1; + + OptixRunner runner; + testing::internal::CaptureStderr(); + ASSERT_EQ(runner.setup_simulation(&sd), RunnerStatus::SUCCESS); + const std::string output = testing::internal::GetCapturedStderr(); + + EXPECT_NE(output.find("Halton"), std::string::npos) + << "Expected a warning about Halton index overflow in stderr, but got: " << output; +} + +// Verify that no warning is printed when Halton is used with max_number_of_rays +// within the 32-bit range. +TEST(Sun, NoHaltonWarningWithinUInt32Range) +{ + SimulationData sd; + element_ptr plate; + make_default_sd_sun(sd, plate); + + auto sun = make_ray_source(); + sun->set_position(0, 0, 100); + sun->set_gen_type(SolTrace::Data::GenType::HALTON); + sd.add_ray_source(sun); + + SimulationParameters& params = sd.get_simulation_parameters(); + params.number_of_rays = 1000; + params.max_number_of_rays = static_cast(std::numeric_limits::max()); + + OptixRunner runner; + testing::internal::CaptureStderr(); + ASSERT_EQ(runner.setup_simulation(&sd), RunnerStatus::SUCCESS); + const std::string output = testing::internal::GetCapturedStderr(); + + // Checking for "Warning" may result in failure due to other warning. Since + // this should be clean, I consider that to be a good thing. + EXPECT_EQ(output.find("Warning"), std::string::npos) + << "Unexpected Halton warning in stderr: " << output; +} diff --git a/google-tests/unit-tests/simulation_runner/optix_runner/two_plate_test.cpp b/google-tests/unit-tests/simulation_runner/optix_runner/two_plate_test.cpp index 17385bee..b88f5916 100644 --- a/google-tests/unit-tests/simulation_runner/optix_runner/two_plate_test.cpp +++ b/google-tests/unit-tests/simulation_runner/optix_runner/two_plate_test.cpp @@ -151,12 +151,12 @@ TEST(TwoPlateOptix, ReflectionToAbsorber) std::vector element_id_vec; std::vector hit_type_vec; sys->get_hp_output(hp_vec, raynumber_vec, element_id_vec, hit_type_vec); - std::vector sunraynumber_vec = sys->get_sunraynumber_vec(); + // std::vector sunraynumber_vec = sys->get_sunraynumber_vec(); EXPECT_EQ(hp_vec.size(), raynumber_vec.size()); // Hit results are same size EXPECT_EQ(raynumber_vec.size(), element_id_vec.size()); EXPECT_EQ(element_id_vec.size(), hit_type_vec.size()); - EXPECT_EQ(N_sun_rays, sunraynumber_vec.back()); // Reported sun rays is the sun ray id of last hit + // EXPECT_EQ(N_sun_rays, sunraynumber_vec.back()); // Reported sun rays is the sun ray id of last hit EXPECT_TRUE(N_sun_rays <= sd.get_simulation_parameters().max_number_of_rays); // Only generated max number of rays or fewer } @@ -245,4 +245,41 @@ TEST(TwoPlateOptix, SimResults) } } -} \ No newline at end of file +} + +TEST(TwoPlateOptix, TrimExcessRaysOption) +{ + SimulationData sd; + element_ptr plate1, plate2; + make_two_plate_sd(sd, plate1, plate2); + const int n_rays = sd.get_simulation_parameters().number_of_rays; + + // Default: trim enabled — result has exactly n_rays records + { + OptixRunner runner; + EXPECT_TRUE(runner.get_trim_excess_rays()); // default is true + + ASSERT_EQ(runner.initialize(), RunnerStatus::SUCCESS); + ASSERT_EQ(runner.setup_simulation(&sd), RunnerStatus::SUCCESS); + ASSERT_EQ(runner.run_simulation(), RunnerStatus::SUCCESS); + + SimulationResult result; + ASSERT_EQ(runner.report_simulation(&result, 0), RunnerStatus::SUCCESS); + EXPECT_EQ(result.get_number_of_records(), n_rays); + } + + // Trim disabled — result has at least n_rays records (batch overshoot is not removed) + { + OptixRunner runner; + runner.set_trim_excess_rays(false); + EXPECT_FALSE(runner.get_trim_excess_rays()); + + ASSERT_EQ(runner.initialize(), RunnerStatus::SUCCESS); + ASSERT_EQ(runner.setup_simulation(&sd), RunnerStatus::SUCCESS); + ASSERT_EQ(runner.run_simulation(), RunnerStatus::SUCCESS); + + SimulationResult result; + ASSERT_EQ(runner.report_simulation(&result, 0), RunnerStatus::SUCCESS); + EXPECT_GE(result.get_number_of_records(), n_rays); + } +}