Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
8afad2e
Add start of Kokkos implementation
jhdavis8 Jun 29, 2023
fc50bcd
Add compiling Kokkos port, verification fails
jhdavis8 Jul 6, 2023
fb67e69
Small changes to Simulation in Kokkos for easier debugging
jhdavis8 Jul 10, 2023
b1e6fbd
Changes to support CPU only compilation
jhdavis8 Jul 10, 2023
91d542f
Change max num nucs to 0d View
jhdavis8 Jul 10, 2023
a40652b
Fix improper copying of host pointer to device, validation passing
jhdavis8 Jul 10, 2023
c139c2e
Restore max num nucs initialization style
jhdavis8 Jul 10, 2023
7aeaa64
Update clean rule
jhdavis8 Jul 18, 2023
e9f4c89
Adjust Kokkos Makefile to default to Summit options
jhdavis8 Jul 10, 2023
1d849f3
Clean some whitespace, typedef unmanaged views for brevity
jhdavis8 Jul 12, 2023
7bc64e4
Modify timers to align workload in Kokkos
jhdavis8 Jul 12, 2023
903f884
Some makefile changes for Kokkos
jhdavis8 Aug 9, 2023
3eeb92f
feat: add cmake for kokkos port
pranav-sivaraman Aug 15, 2023
c93442e
refactor: rename xsbench excutable to XSBench
pranav-sivaraman Aug 15, 2023
29e08a7
refactor: rename *.hip to *.cpp
pranav-sivaraman Aug 15, 2023
3066671
Add missing install description for kokkos CMake
jhdavis8 Aug 23, 2023
cacfead
Adjust install dir to fix spack compatibility
jhdavis8 Aug 23, 2023
8239f53
refactor: use Kokkos timer instead of openmp to remove openmp dep
pranav-sivaraman Sep 24, 2023
7e6161f
fix: change where timer starts
pranav-sivaraman Sep 24, 2023
275174b
fix: set default thread count to 1
pranav-sivaraman Sep 24, 2023
87c10de
refactor: adjust KOKKOS_PATH in Makefile
pranav-sivaraman Oct 19, 2023
e0b7bff
fix: adjust timer start and end
pranav-sivaraman Oct 19, 2023
3aa0caf
feat: add kokkos model description to README
pranav-sivaraman Oct 19, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ This version of XSBench is written in SYCL, and can be used for CPU, GPU, FPGA,
5. **XSBench/hip**
This version of XSBench is written in HIP for use with GPU architectures. This version is derived from CUDA using an automatic conversion tool with only a few small manual changes.

6. XSBench/kokkos
This version of XSBench is written using the Kokkos programming model allowing it to execute on multiple different GPUs or CPUs. We have provided both Makefile and CMake build system options for convenience

## Compilation

To compile XSBench with default settings, navigate to your selected source directory and use the following command:
Expand Down
26 changes: 26 additions & 0 deletions kokkos/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
cmake_minimum_required(VERSION 3.16)

project(
XSBench_Kokkos
VERSION 1.0
LANGUAGES CXX
)

set(CMAKE_CXX_STANDARD 17)

if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()

set(CMAKE_CXX_FLAGS "-Wall -Wextra")
set(CMAKE_CXX_FLAGS_DEBUG "-g")
set(CMAKE_CXX_FLAGS_RELEASE "-O3")

find_package(Kokkos REQUIRED)

set(SOURCE Main.cpp io.cpp Simulation.cpp GridInit.cpp XSutils.cpp Materials.cpp)

add_executable(XSBench ${SOURCE})
target_link_libraries(XSBench Kokkos::kokkos)

install(TARGETS XSBench DESTINATION bin)
178 changes: 178 additions & 0 deletions kokkos/GridInit.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
// -*- c-basic-offset: 8; tab-width: 8; indent-tabs-mode: t; -*-
#include "XSbench_header.hpp"

SimulationData grid_init_do_not_profile( Inputs in, int mype )
{
// Structure to hold all allocated simuluation data arrays
SimulationData SD;

// Keep track of how much data we're allocating
size_t nbytes = 0;

// Set the initial seed value
uint64_t seed = 42;

////////////////////////////////////////////////////////////////////
// Initialize Nuclide Grids
////////////////////////////////////////////////////////////////////

if(mype == 0) printf("Intializing nuclide grids...\n");

// First, we need to initialize our nuclide grid. This comes in the form
// of a flattened 2D array that hold all the information we need to define
// the cross sections for all isotopes in the simulation.
// The grid is composed of "NuclideGridPoint" structures, which hold the
// energy level of the grid point and all associated XS data at that level.
// An array of structures (AOS) is used instead of
// a structure of arrays, as the grid points themselves are accessed in
// a random order, but all cross section interaction channels and the
// energy level are read whenever the gridpoint is accessed, meaning the
// AOS is more cache efficient.

// Initialize Nuclide Grid
SD.length_nuclide_grid = in.n_isotopes * in.n_gridpoints;
SD.nuclide_grid = (NuclideGridPoint *) malloc( SD.length_nuclide_grid * sizeof(NuclideGridPoint));
assert(SD.nuclide_grid != NULL);
nbytes += SD.length_nuclide_grid * sizeof(NuclideGridPoint);
for( int i = 0; i < SD.length_nuclide_grid; i++ )
{
SD.nuclide_grid[i].energy = LCG_random_double(&seed);
SD.nuclide_grid[i].total_xs = LCG_random_double(&seed);
SD.nuclide_grid[i].elastic_xs = LCG_random_double(&seed);
SD.nuclide_grid[i].absorbtion_xs = LCG_random_double(&seed);
SD.nuclide_grid[i].fission_xs = LCG_random_double(&seed);
SD.nuclide_grid[i].nu_fission_xs = LCG_random_double(&seed);
}

// Sort so that each nuclide has data stored in ascending energy order.
for( int i = 0; i < in.n_isotopes; i++ )
qsort( &SD.nuclide_grid[i*in.n_gridpoints], in.n_gridpoints, sizeof(NuclideGridPoint), NGP_compare);

// error debug check
/*
for( int i = 0; i < in.n_isotopes; i++ )
{
printf("NUCLIDE %d ==============================\n", i);
for( int j = 0; j < in.n_gridpoints; j++ )
printf("E%d = %lf\n", j, SD.nuclide_grid[i * in.n_gridpoints + j].energy);
}
*/


////////////////////////////////////////////////////////////////////
// Initialize Acceleration Structure
////////////////////////////////////////////////////////////////////

if( in.grid_type == NUCLIDE )
{
SD.length_unionized_energy_array = 0;
SD.length_index_grid = 0;
}

if( in.grid_type == UNIONIZED )
{
if(mype == 0) printf("Intializing unionized grid...\n");

// Allocate space to hold the union of all nuclide energy data
SD.length_unionized_energy_array = in.n_isotopes * in.n_gridpoints;
SD.unionized_energy_array = (double *) malloc( SD.length_unionized_energy_array * sizeof(double));
assert(SD.unionized_energy_array != NULL );
nbytes += SD.length_unionized_energy_array * sizeof(double);

// Copy energy data over from the nuclide energy grid
for( int i = 0; i < SD.length_unionized_energy_array; i++ )
SD.unionized_energy_array[i] = SD.nuclide_grid[i].energy;

// Sort unionized energy array
qsort( SD.unionized_energy_array, SD.length_unionized_energy_array, sizeof(double), double_compare);

// Allocate space to hold the acceleration grid indices
SD.length_index_grid = SD.length_unionized_energy_array * in.n_isotopes;
SD.index_grid = (int *) malloc( SD.length_index_grid * sizeof(int));
assert(SD.index_grid != NULL);
nbytes += SD.length_index_grid * sizeof(int);

// Generates the double indexing grid
int * idx_low = (int *) calloc( in.n_isotopes, sizeof(int));
assert(idx_low != NULL );
double * energy_high = (double *) malloc( in.n_isotopes * sizeof(double));
assert(energy_high != NULL );

for( int i = 0; i < in.n_isotopes; i++ )
energy_high[i] = SD.nuclide_grid[i * in.n_gridpoints + 1].energy;

for( long e = 0; e < SD.length_unionized_energy_array; e++ )
{
double unionized_energy = SD.unionized_energy_array[e];
for( long i = 0; i < in.n_isotopes; i++ )
{
if( unionized_energy < energy_high[i] )
SD.index_grid[e * in.n_isotopes + i] = idx_low[i];
else if( idx_low[i] == in.n_gridpoints - 2 )
SD.index_grid[e * in.n_isotopes + i] = idx_low[i];
else
{
idx_low[i]++;
SD.index_grid[e * in.n_isotopes + i] = idx_low[i];
energy_high[i] = SD.nuclide_grid[i * in.n_gridpoints + idx_low[i] + 1].energy;
}
}
}

free(idx_low);
free(energy_high);
}

if( in.grid_type == HASH )
{
if(mype == 0) printf("Intializing hash grid...\n");
SD.length_unionized_energy_array = 0;
SD.length_index_grid = in.hash_bins * in.n_isotopes;
SD.index_grid = (int *) malloc( SD.length_index_grid * sizeof(int));
assert(SD.index_grid != NULL);
nbytes += SD.length_index_grid * sizeof(int);

double du = 1.0 / in.hash_bins;

// For each energy level in the hash table
#pragma omp parallel for
for( long e = 0; e < in.hash_bins; e++ )
{
double energy = e * du;

// We need to determine the bounding energy levels for all isotopes
for( long i = 0; i < in.n_isotopes; i++ )
{
SD.index_grid[e * in.n_isotopes + i] = grid_search_nuclide_old( in.n_gridpoints, energy, SD.nuclide_grid + i * in.n_gridpoints, 0, in.n_gridpoints-1);
}
}
}

////////////////////////////////////////////////////////////////////
// Initialize Materials and Concentrations
////////////////////////////////////////////////////////////////////
if(mype == 0) printf("Intializing material data...\n");

// Set the number of nuclides in each material
SD.num_nucs = load_num_nucs(in.n_isotopes);
SD.length_num_nucs = 12; // There are always 12 materials in XSBench

// Intialize the flattened 2D grid of material data. The grid holds
// a list of nuclide indices for each of the 12 material types. The
// grid is allocated as a full square grid, even though not all
// materials have the same number of nuclides.
SD.mats = load_mats(SD.num_nucs, in.n_isotopes, &SD.max_num_nucs);
SD.length_mats = SD.length_num_nucs * SD.max_num_nucs;

// Intialize the flattened 2D grid of nuclide concentration data. The grid holds
// a list of nuclide concentrations for each of the 12 material types. The
// grid is allocated as a full square grid, even though not all
// materials have the same number of nuclides.
SD.concs = load_concs(SD.num_nucs, SD.max_num_nucs);
SD.length_concs = SD.length_mats;

if(mype == 0) printf("Intialization complete. Allocated %.0lf MB of data.\n", nbytes/1024.0/1024.0 );

return SD;

}
118 changes: 118 additions & 0 deletions kokkos/Main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
// -*- c-basic-offset: 8; tab-width: 8; indent-tabs-mode: t; -*-
#include "XSbench_header.hpp"

#ifdef MPI
#include<mpi.h>
#endif

int main( int argc, char* argv[] )
{
// =====================================================================
// Initialization & Command Line Read-In
// =====================================================================
int version = 20;
int mype = 0;
int nprocs = 1;
unsigned long long verification;

#ifdef MPI
MPI_Status stat;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &mype);
#endif

// Start Kokkos
Kokkos::initialize();

// Process CLI Fields -- store in "Inputs" structure
Inputs in = read_CLI( argc, argv );

// Set number of OpenMP Threads
//omp_set_num_threads(in.nthreads);

// Print-out of Input Summary
if( mype == 0 )
print_inputs( in, nprocs, version );

// =====================================================================
// Prepare Nuclide Energy Grids, Unionized Energy Grid, & Material Data
// This is not reflective of a real Monte Carlo simulation workload,
// therefore, do not profile this region!
// =====================================================================

SimulationData SD;

// If read from file mode is selected, skip initialization and load
// all simulation data structures from file instead
if( in.binary_mode == READ )
SD = binary_read(in);
else
SD = grid_init_do_not_profile( in, mype );

// If writing from file mode is selected, write all simulation data
// structures to file
if( in.binary_mode == WRITE && mype == 0 )
binary_write(in, SD);


// =====================================================================
// Cross Section (XS) Parallel Lookup Simulation
// This is the section that should be profiled, as it reflects a
// realistic continuous energy Monte Carlo macroscopic cross section
// lookup kernel.
// =====================================================================

if( mype == 0 )
{
printf("\n");
border_print();
center_print("SIMULATION", 79);
border_print();
}

// Start Simulation Timer

double elapsed_time = 0;

// Run simulation
if( in.simulation_method == EVENT_BASED )
{
if( in.kernel_id == 0 )
verification = run_event_based_simulation(in, SD, mype, &elapsed_time);
else
{
printf("Error: No kernel ID %d found!\n", in.kernel_id);
exit(1);
}
}
else
{
printf("History-based simulation not implemented in Kokkos code. Instead,\nuse the event-based method with \"-m event\" argument.\n");
exit(1);
}

if( mype == 0)
{
printf("\n" );
printf("Simulation complete.\n" );
}

// =====================================================================
// Output Results & Finalize
// =====================================================================

// Final Hash Step
verification = verification % 999983;

// Print / Save Results and Exit
int is_invalid_result = print_results( in, mype, elapsed_time, nprocs, verification );

Kokkos::finalize();

#ifdef MPI
MPI_Finalize();
#endif

return is_invalid_result;
}
Loading