From 20b602b006dbb9a7d8f561fef9bdbd2d631a2e19 Mon Sep 17 00:00:00 2001 From: stefanvanberkum <40003032+stefanvanberkum@users.noreply.github.com> Date: Mon, 21 Aug 2023 16:39:58 +0200 Subject: [PATCH] Added PyTorch Geometric functionality. --- bindings/pyroot/pythonizations/CMakeLists.txt | 2 + .../ROOT/_pythonization/_tmva/__init__.py | 2 + .../ROOT/_pythonization/_tmva/_torchgnn.py | 35 ++ tmva/sofie/CMakeLists.txt | 15 + tmva/sofie/inc/LinkDef.h | 2 + .../inc/TMVA/TorchGNN/RModel_TorchGNN.hxx | 203 ++++++++++++ .../inc/TMVA/TorchGNN/modules/RModule.hxx | 122 +++++++ .../inc/TMVA/TorchGNN/modules/RModule_Add.hxx | 97 ++++++ .../inc/TMVA/TorchGNN/modules/RModule_Cat.hxx | 247 ++++++++++++++ .../TMVA/TorchGNN/modules/RModule_GCNConv.hxx | 308 ++++++++++++++++++ .../modules/RModule_GlobalMeanPool.hxx | 130 ++++++++ .../TMVA/TorchGNN/modules/RModule_Input.hxx | 116 +++++++ .../TMVA/TorchGNN/modules/RModule_Linear.hxx | 231 +++++++++++++ .../TMVA/TorchGNN/modules/RModule_ReLU.hxx | 98 ++++++ .../TMVA/TorchGNN/modules/RModule_Reshape.hxx | 136 ++++++++ .../TMVA/TorchGNN/modules/RModule_Softmax.hxx | 107 ++++++ .../modules/RModule_SparseGCNConv.hxx | 285 ++++++++++++++++ tmva/sofie/src/TorchGNN/RModel_TorchGNN.cxx | 294 +++++++++++++++++ tmva/sofie/test/TorchGNN/AddTest.cxx | 73 +++++ tmva/sofie/test/TorchGNN/CatTest.cxx | 128 ++++++++ .../test/TorchGNN/GlobalMeanPoolTest.cxx | 47 +++ tmva/sofie/test/TorchGNN/LinearTest.cxx | 52 +++ tmva/sofie/test/TorchGNN/ReLUTest.cxx | 35 ++ tmva/sofie/test/TorchGNN/ReshapeTest.cxx | 64 ++++ tmva/sofie/test/TorchGNN/SaveTest.cxx | 25 ++ tmva/sofie/test/TorchGNN/SoftmaxTest.cxx | 47 +++ 26 files changed, 2901 insertions(+) create mode 100644 bindings/pyroot/pythonizations/python/ROOT/_pythonization/_tmva/_torchgnn.py create mode 100644 tmva/sofie/inc/TMVA/TorchGNN/RModel_TorchGNN.hxx create mode 100644 tmva/sofie/inc/TMVA/TorchGNN/modules/RModule.hxx create mode 100644 tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_Add.hxx create mode 100644 tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_Cat.hxx create mode 100644 tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_GCNConv.hxx create mode 100644 tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_GlobalMeanPool.hxx create mode 100644 tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_Input.hxx create mode 100644 tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_Linear.hxx create mode 100644 tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_ReLU.hxx create mode 100644 tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_Reshape.hxx create mode 100644 tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_Softmax.hxx create mode 100644 tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_SparseGCNConv.hxx create mode 100644 tmva/sofie/src/TorchGNN/RModel_TorchGNN.cxx create mode 100644 tmva/sofie/test/TorchGNN/AddTest.cxx create mode 100644 tmva/sofie/test/TorchGNN/CatTest.cxx create mode 100644 tmva/sofie/test/TorchGNN/GlobalMeanPoolTest.cxx create mode 100644 tmva/sofie/test/TorchGNN/LinearTest.cxx create mode 100644 tmva/sofie/test/TorchGNN/ReLUTest.cxx create mode 100644 tmva/sofie/test/TorchGNN/ReshapeTest.cxx create mode 100644 tmva/sofie/test/TorchGNN/SaveTest.cxx create mode 100644 tmva/sofie/test/TorchGNN/SoftmaxTest.cxx diff --git a/bindings/pyroot/pythonizations/CMakeLists.txt b/bindings/pyroot/pythonizations/CMakeLists.txt index 57c12becf9c12..7eea45d1925e2 100644 --- a/bindings/pyroot/pythonizations/CMakeLists.txt +++ b/bindings/pyroot/pythonizations/CMakeLists.txt @@ -65,6 +65,8 @@ if(tmva) ROOT/_pythonization/_tmva/_rtensor.py ROOT/_pythonization/_tmva/_tree_inference.py ROOT/_pythonization/_tmva/_utils.py) + list(APPEND PYROOT_EXTRA_PY3_SOURCE + ROOT/_pythonization/_tmva/_torchgnn.py) endif() if(PYTHON_VERSION_STRING_Development_Main VERSION_GREATER_EQUAL 3.8 AND dataframe) diff --git a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_tmva/__init__.py b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_tmva/__init__.py index 0c8fb8a3d131c..47a9ecca5a32a 100644 --- a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_tmva/__init__.py +++ b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_tmva/__init__.py @@ -22,6 +22,8 @@ from ._rbdt import Compute, pythonize_rbdt +from ._torchgnn import RModel_TorchGNN + if sys.version_info >= (3, 8): from ._batchgenerator import ( CreateNumPyGenerators, diff --git a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_tmva/_torchgnn.py b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_tmva/_torchgnn.py new file mode 100644 index 0000000000000..410343c9e3733 --- /dev/null +++ b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_tmva/_torchgnn.py @@ -0,0 +1,35 @@ +""" +Helper functions for Python TorchGNN. + +Author: Stefan van Berkum +""" + +from .. import pythonization +from cppyy.gbl.std import vector, map + + +class RModel_TorchGNN(): + def ExtractParameters(self, model): + """Extract the parameters from a PyTorch model. + + In order for this to work, the parameterized module names in ROOT should + be the same as those in the PyTorch state dictionary, which is named + after the class attributes. + For example: + Torch: self.linear_1 = torch.nn.Linear(5, 20) + ROOT: model.AddModule(ROOT.TMVA.Experimental.SOFIE.RModule_Linear('X', + 5, 20), 'linear_1') + + :param model: The PyTorch model. + """ + + # Transform Python dictionary to C++ map and load parameters. + m = map[str, vector[float]]() + for key, value in model.state_dict().items(): + m[key] = value.cpu().numpy().flatten().tolist() + self.LoadParameters(m) + + +@pythonization("RModel_TorchGNN", ns="TMVA::Experimental::SOFIE") +def pythonize_torchgnn_extractparameters(klass): + setattr(klass, "ExtractParameters", RModel_TorchGNN.ExtractParameters) diff --git a/tmva/sofie/CMakeLists.txt b/tmva/sofie/CMakeLists.txt index e29db7129429f..fb5640270a8cd 100644 --- a/tmva/sofie/CMakeLists.txt +++ b/tmva/sofie/CMakeLists.txt @@ -44,9 +44,24 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ROOTTMVASofie TMVA/ROperator_Erf.hxx TMVA/SOFIE_common.hxx TMVA/SOFIEHelpers.hxx + + TMVA/TorchGNN/modules/RModule_Add.hxx + TMVA/TorchGNN/modules/RModule_Cat.hxx + TMVA/TorchGNN/modules/RModule_GCNConv.hxx + TMVA/TorchGNN/modules/RModule_GlobalMeanPool.hxx + TMVA/TorchGNN/modules/RModule_Input.hxx + TMVA/TorchGNN/modules/RModule_Linear.hxx + TMVA/TorchGNN/modules/RModule_ReLU.hxx + TMVA/TorchGNN/modules/RModule_Reshape.hxx + TMVA/TorchGNN/modules/RModule_Softmax.hxx + TMVA/TorchGNN/modules/RModule.hxx + + TMVA/TorchGNN/RModel_TorchGNN.hxx SOURCES src/RModel.cxx src/SOFIE_common.cxx + + src/TorchGNN/RModel_TorchGNN.cxx DEPENDENCIES TMVA ) diff --git a/tmva/sofie/inc/LinkDef.h b/tmva/sofie/inc/LinkDef.h index 3282ff22c1d32..553b750738ac9 100644 --- a/tmva/sofie/inc/LinkDef.h +++ b/tmva/sofie/inc/LinkDef.h @@ -14,5 +14,7 @@ #pragma link C++ struct TMVA::Experimental::SOFIE::TensorInfo+; #pragma link C++ struct TMVA::Experimental::SOFIE::InputTensorInfo+; #pragma link C++ struct TMVA::Experimental::SOFIE::Dim+; +#pragma link C++ class TMVA::Experimental::SOFIE::RModule+; +#pragma link C++ class TMVA::Experimental::SOFIE::RModel_TorchGNN+; #endif diff --git a/tmva/sofie/inc/TMVA/TorchGNN/RModel_TorchGNN.hxx b/tmva/sofie/inc/TMVA/TorchGNN/RModel_TorchGNN.hxx new file mode 100644 index 0000000000000..f91ea428570ee --- /dev/null +++ b/tmva/sofie/inc/TMVA/TorchGNN/RModel_TorchGNN.hxx @@ -0,0 +1,203 @@ +// @(#)root/tmva/sofie:$Id$ +// Author: Stefan van Berkum + +/** + * Header file for PyTorch Geometric models. + * + * Models are created by the user and parameters can then be loaded into each layer. + * + * IMPORTANT: Changes to the format (e.g., namespaces) may affect the emit + * defined in RModel_TorchGNN.cxx (save). +*/ + +#ifndef TMVA_SOFIE_RMODEL_TORCHGNN_H_ +#define TMVA_SOFIE_RMODEL_TORCHGNN_H_ + +#include "TMVA/TorchGNN/modules/RModule.hxx" +#include "TMVA/TorchGNN/modules/RModule_Input.hxx" +#include +#include + +namespace TMVA { +namespace Experimental { +namespace SOFIE { + +class RModel_TorchGNN { + public: + /** Model constructor without inputs. */ + RModel_TorchGNN() {} + + /** + * Model constructor with manual input names. + * + * @param input_names Vector of input names. + * @param input_shapes Vector of input shapes. Each element may contain + * at most one wildcard (-1). + */ + RModel_TorchGNN(std::vector input_names, std::vector> input_shapes) { + fInputs = input_names; + fShapes = input_shapes; + + // Generate input layers. + for (std::size_t i = 0; i < input_names.size(); i++) { + // Check shape. + if (std::any_of(input_shapes[i].begin(), input_shapes[i].end(), [](int j){return j == 0;})) { + throw std::invalid_argument("Invalid input shape for input " + input_names[i] + ". Dimension cannot be zero."); + } + if (std::any_of(input_shapes[i].begin(), input_shapes[i].end(), [](int j){return j < -1;})) { + throw std::invalid_argument("Invalid input shape for input " + input_names[i] + ". Shape cannot have negative entries (except for the wildcard dimension)."); + } + if (std::count(input_shapes[i].begin(), input_shapes[i].end(), -1) > 1) { + throw std::invalid_argument("Invalid input shape for input " + input_names[i] + ". Shape may have at most one wildcard."); + } + AddModule(RModule_Input(input_shapes[i]), input_names[i]); + } + } + + /** + * Add a module to the module list. + * + * @param module Module to add. + * @param name Module name. Defaults to the module type with a count + * value (e.g., GCNConv_1). + */ + template + void AddModule(T module, std::string name="") { + std::string new_name = (name == "") ? std::string(module.GetOperation()) : name; + if (fModuleCounts[new_name] > 0) { + // Module exists, so add discriminator and increment count. + new_name += "_" + std::to_string(fModuleCounts[new_name]); + fModuleCounts[new_name]++; + + if (name != "") { + // Issue warning. + std::cout << "WARNING: Module with duplicate name \"" << name << "\" renamed to \"" << new_name << "\"." << std::endl; + } + } else { + // First module of its kind. + fModuleCounts[new_name] = 1; + } + module.SetName(new_name); + + // Initialize the module. + module.Initialize(fModules, fModuleMap); + + // Add module to the module list. + fModules.push_back(std::make_shared(module)); + fModuleMap[std::string(module.GetName())] = fModuleCount; + fModuleCount++; + } + + /** + * Run the forward function. + * + * @param args Any number of input arguments. + * @returns The output of the last layer. + */ + template + std::vector Forward(Types... args) { + auto input = std::make_tuple(args...); + + // Instantiate input layers. + int k = 0; + std::apply( + [&](auto&... in) { + ((std::dynamic_pointer_cast(fModules[k++]) -> SetParams(in)), ...); + }, input); + + // Loop through and execute modules. + for (std::shared_ptr module: fModules) { + module -> Execute(); + } + + // Return output of the last layer. + const std::vector& out_const = fModules.back() -> GetOutput(); + std::vector out = out_const; + return out; + } + + /** + * Load parameters from PyTorch state dictionary for all modules. + * + * @param state_dict The state dictionary. + */ + void LoadParameters(std::map> state_dict) { + for (std::shared_ptr module: fModules) { + module -> LoadParameters(state_dict); + } + } + + /** + * Load saved parameters for all modules. + */ + void LoadParameters() { + for (std::shared_ptr module: fModules) { + module -> LoadParameters(); + } + } + + /** + * Save the model as standalone inference code. + * + * @param path Path to save location. + * @param name Model name. + * @param overwrite True if any existing directory should be + * overwritten. Defaults to false. + */ + void Save(std::string path, std::string name, bool overwrite=false); + private: + /** + * Get a timestamp. + * + * @returns The timestamp in string format. + */ + static std::string GetTimestamp() { + time_t rawtime; + struct tm * timeinfo; + char timestamp [80]; + time(&rawtime); + timeinfo = localtime(&rawtime); + strftime(timestamp, 80, "Timestamp: %d-%m-%Y %T.", timeinfo); + return timestamp; + } + + /** + * Write the methods to create a self-contained package. + * + * @param dir Directory to save to. + * @param name Model name. + * @param timestamp Timestamp. + */ + void WriteMethods(std::string dir, std::string name, std::string timestamp); + + /** + * Write the model to a file. + * + * @param dir Directory to save to. + * @param name Model name. + * @param timestamp Timestamp. + */ + void WriteModel(std::string dir, std::string name, std::string timestamp); + + /** + * Write the CMakeLists file. + * + * @param dir Directory to save to. + * @param name Model name. + * @param timestamp Timestamp. + */ + void WriteCMakeLists(std::string dir, std::string name, std::string timestamp); + + std::vector fInputs; // Vector of input names. + std::vector> fShapes; // Vector of input shapes. + std::map fModuleCounts; // Map from module name to number of occurrences. + std::vector> fModules; // Vector containing the modules. + std::map fModuleMap; // Map from module name to module index (in modules). + int fModuleCount = 0; // Number of modules. +}; + +} // SOFIE. +} // Experimental. +} // TMVA. + +#endif // TMVA_SOFIE_RMODEL_TORCHGNN_H_ diff --git a/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule.hxx b/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule.hxx new file mode 100644 index 0000000000000..4ed5c68c7cf96 --- /dev/null +++ b/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule.hxx @@ -0,0 +1,122 @@ +// @(#)root/tmva/sofie:$Id$ +// Author: Stefan van Berkum + +/** + * Base class for RModule objects used in GNNs. + * + * Modules define the operations that can be performed in a forward pass. They + * can be layers, activations, or generic operations. Each module defines a + * forward method that updates fOutput (the output of the last call). + * + * IMPORTANT: Besides the virtual methods, each RModule should assign its inputs + * to the class variable "fInputs" and other arguments to the class variable + * "fArgs" (in string format). + * IMPORTANT: Changes to the format (e.g., namespaces) may affect the emit + * defined in RModel_TorchGNN.cxx (save). To be safe, new modules should closely + * follow the format of the exisiting modules (i.e., copy-paste and edit). +*/ + +#ifndef TMVA_SOFIE_RMODULE_H_ +#define TMVA_SOFIE_RMODULE_H_ + +#include +#include +#include +#include + +namespace TMVA { +namespace Experimental { +namespace SOFIE { + +class RModule { + public: + virtual ~RModule() {}; + + /** + * Initialize the RModule by binding it to its input modules. + * + * @param module_list Vector containing the modules. + * @param module_map Map from module name to module index (in modules). + */ + void Initialize(std::vector> module_list, std::map module_map) { + for (std::string input: fInputs) { + fInputModules.push_back(module_list[module_map[input]]); + } + } + + /** + * Execute the module. + * + * This triggers the module's forward method and stores the output and + * its shape. + */ + void Execute() { + fOutShape = InferShape(); // Infer shape on the fly. TODO: Test impact on performance and possibly execute once for static models. + Forward(); + } + + /** + * Change this module's name. + * + * @param new_name New module name. + */ + void SetName(std::string new_name) {fName = new_name;} + + /** + * Get this module's name. + * + * @returns The module name. + */ + std::string_view GetName() {return fName;} + + /** + * Get the module inputs. + * + * @returns The inputs for this module. + */ + std::vector GetInputs() {return fInputs;} + + /** + * Get the module arguments. + */ + std::vector GetArgs() {return fArgs;} + + /** + * Get the output of the last call to this module. + * + * @returns The output of the last call. + */ + const std::vector& GetOutput() const {return fOutput;} + + /** + * Get the output shape of the last call to this module. + * + * @returns The output shape of the last call. + */ + std::vector GetShape() {return fOutShape;} + + virtual void Forward() = 0; // Forward method to be implemented by each module. + + virtual std::vector InferShape() = 0; // Output shape inference to be implemented by each module. + + virtual std::string_view GetOperation() = 0; // Operation name getter to be implemented by each module. + + virtual void SaveParameters(std::string dir) = 0; // Parameter saver to be implemented by each module. + + virtual void LoadParameters() = 0; // Parameter loader to be implemented by each module. + + virtual void LoadParameters(std::map>) = 0; // Parameter loader to be implemented by each module. + protected: + std::string fName; // Module name. + std::vector> fInputModules; // Vector of input modules. + std::vector fInputs; // Input names. + std::vector fArgs; // Other arguments. + std::vector fOutput; // Output of last call. + std::vector fOutShape; // Output shape of last call. +}; + +} // TMVA. +} // Experimental. +} // SOFIE. + +#endif // TMVA_SOFIE_RMODULE_H_ diff --git a/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_Add.hxx b/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_Add.hxx new file mode 100644 index 0000000000000..b38c423030075 --- /dev/null +++ b/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_Add.hxx @@ -0,0 +1,97 @@ +// @(#)root/tmva/sofie:$Id$ +// Author: Stefan van Berkum + +/** + * Addition module. +*/ + +#ifndef TMVA_SOFIE_RMODULE_ADD_H_ +#define TMVA_SOFIE_RMODULE_ADD_H_ + +#include "TMVA/TorchGNN/modules/RModule.hxx" +#include + +namespace TMVA { +namespace Experimental { +namespace SOFIE { + +class RModule_Add: public RModule { + public: + /** + * Construct the addition module. + * + * @param a The first argument. + * @param b The second argument. + */ + RModule_Add(std::string a, std::string b) { + fInputs = {a, b}; + fArgs = {}; + } + + /** Destruct the module. */ + ~RModule_Add() {}; + + /** + * Add the arguments a and b. + */ + void Forward() { + const std::vector& a = fInputModules[0] -> GetOutput(); + const std::vector& b = fInputModules[1] -> GetOutput(); + fOutput = b; + + int n = a.size(); + + cblas_saxpy(n, 1, a.data(), 1, fOutput.data(), 1); + } + + /** + * Infer the output shape. + * + * For this module, the output shape is the same as the input shape. + * + * @returns The output shape. + */ + std::vector InferShape() { + return fInputModules[0] -> GetShape(); + } + + /** + * Get the operation. + * + * @returns The name of the operation. + */ + std::string_view GetOperation() { + return "Add"; + } + + /** + * Save parameters. + * + * Does nothing for this module. + * + * @param dir Save directory. + */ + void SaveParameters([[maybe_unused]] std::string dir) {} + + /** + * Load saved parameters. + * + * Does nothing for this module. + */ + void LoadParameters() {} + + /** + * Load parameters from PyTorch state dictionary. + * + * Does nothing for this module. + * + * @param state_dict The state dictionary. + */ + void LoadParameters([[maybe_unused]] std::map> state_dict) {} +}; + +} // TMVA. +} // Experimental. +} // SOFIE. + +#endif // TMVA_SOFIE_RMODULE_ADD_H_ diff --git a/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_Cat.hxx b/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_Cat.hxx new file mode 100644 index 0000000000000..cb14b1f5f3a1b --- /dev/null +++ b/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_Cat.hxx @@ -0,0 +1,247 @@ +// @(#)root/tmva/sofie:$Id$ +// Author: Stefan van Berkum + +/** + * Concatenation module. +*/ + +#ifndef TMVA_SOFIE_RMODULE_CAT_H_ +#define TMVA_SOFIE_RMODULE_CAT_H_ + +#include "TMVA/TorchGNN/modules/RModule.hxx" +#include + +// TODO: Remove. +#include + +namespace TMVA { +namespace Experimental { +namespace SOFIE { + +class RModule_Cat: public RModule { + public: + /** + * Construct the concatenation module. + * + * This will concatenate a and b along the specified dimension. The + * inputs need to have the same shape along every axis except the + * concatenation dimension. + * + * @param a The first argument. + * @param b The second argument. + * @param dim Dimension along which to concatenate. + */ + RModule_Cat(std::string a, std::string b, int dim) { + fCDim = dim; + + fInputs = {a, b}; + fArgs = {std::to_string(dim)}; + } + + /** Destruct the module. */ + ~RModule_Cat() {}; + + /** + * Concatenate the inputs. + */ + void Forward() { + const std::vector& a = fInputModules[0] -> GetOutput(); + const std::vector& b = fInputModules[1] -> GetOutput(); + std::shared_ptr> out = std::make_shared>(); + RecursiveCat(a, b, {}, out); + fOutput = *out; + } + + /** + * Infer the output shape. + * + * For this module, the output shape is the shape of input a, extended + * along the concatenation dimension with input b. + * + * @returns The output shape. + */ + std::vector InferShape() { + fShapeA = fInputModules[0] -> GetShape(); + fShapeB = fInputModules[1] -> GetShape(); + + // Check shapes. + for (std::size_t i = 0; i < fShapeA.size(); i++) { + if (i != fCDim && fShapeA[i] != fShapeB[i]) { + std::vector expected_shape = fShapeA; + expected_shape[fCDim] = -1; + std::string ex_s_str = "[" + std::to_string(expected_shape[0]); + std::string b_s_str = "[" + std::to_string(fShapeB[0]); + for (std::size_t j = 1; j < expected_shape.size(); j++) { + ex_s_str += ", " + std::to_string(expected_shape[j]); + b_s_str += ", " + std::to_string(fShapeB[j]); + } + ex_s_str += "]"; + b_s_str += "]"; + throw std::invalid_argument("Incompatible shapes in concatenation layer " + std::string(GetName()) + ". Expected shape " + ex_s_str + ", got shape " + b_s_str + "."); + } + } + + std::vector shape = fShapeA; + shape[fCDim] += fShapeB[fCDim]; + fDims = shape; + return shape; + } + + /** + * Get the operation. + * + * @returns The name of the operation. + */ + std::string_view GetOperation() { + return "Cat"; + } + + /** + * Save parameters. + * + * Does nothing for this module. + * + * @param dir Save directory. + */ + void SaveParameters([[maybe_unused]] std::string dir) {} + + /** + * Load saved parameters. + * + * Does nothing for this module. + */ + void LoadParameters() {} + + /** + * Load parameters from PyTorch state dictionary. + * + * Does nothing for this module. + * + * @param state_dict The state dictionary. + */ + void LoadParameters([[maybe_unused]] std::map> state_dict) {} + private: + /** + * Recursively concatenate the inputs along a prespecified dimension. + * + * The alorithm recursively loops through the dimensions, up until it + * hits the concatenation dimension (cdim). At this point, it appends all + * elements of a and b at the current position to out, by consecutively + * looping through the remaining dimensions in a and b. The current + * position is stored in inds. Whenever the last index in inds exceeds the + * desired output dimension, it is popped from the array and the preceding + * index is incremented. When this is not the case and inds has not + * reached the concatenation dimension yet, we append a new dimension and + * set its index to zero. The algorithm terminates when all elements in + * inds are popped (i.e., when the list is empty again). + * + * @param a The first input. + * @param b The second input. + * @param inds The current position of the algorithm. To start the + * algorithm, this should be an empty vector. + * @param out The output. + */ + void RecursiveCat(std::vector a, std::vector b, std::vector inds, std::shared_ptr> out) { + if (inds.size() == 0) { + // We are at the start of the algorithm. + if (fCDim == 0) { + // The concatenation dimension is zero, so just append + // everything. + for (float elem: a) { + out -> push_back(elem); + } + for (float elem: b) { + out -> push_back(elem); + } + return; + } else { + // Add first dimension to inds. + inds.push_back(0); + } + } else if (inds.back() >= fDims[inds.size() - 1]) { + // The last index is at the desired output dimension. + if (inds.size() == 1) { + // Popping would empty the list, so we are done. + return; + } else { + // Pop the last element from inds and increment the + // preceding index. + inds.pop_back(); + inds.back()++; + } + } else if (inds.size() == fCDim) { + // Inds has reached the concatenation dimension, so add the + // elements of a and b at this position. + for (int i = 0; i < fShapeA[fCDim]; i++) { + Append(a, fShapeA[fCDim], i, inds, out); + } + for (int i = 0; i < fShapeB[fCDim]; i++) { + Append(b, fShapeB[fCDim], i, inds, out); + } + inds.back()++; + } else if (inds.size() < fCDim) { + // Concatenation dimension is not reached yet, so add new + // dimension to inds. + inds.push_back(0); + } else { + throw std::runtime_error("Error in concatenation layer."); + } + RecursiveCat(a, b, inds, out); + } + + /** + * Append all elements of x at the current position to out. + * + * @param x The input vector. + * @param c_shape The shape of x at the concatenation dimension. + * @param c_count The current position in the concatenation dimension + * for this input. + * @param inds The current position of the algorithm. + * @param out The output. + */ + void Append(std::vector x, int c_shape, int c_count, std::vector inds, std::shared_ptr> out) { + // Find the remaining dimensions after the concatenation dimension. + std::vector remaining_dims = std::vector(fDims.begin() + inds.size() + 1, fDims.end()); + + // Determine the number of elements to be added. + int n_add = 1; + for (int i: remaining_dims) { + n_add *= i; + } + + // Find starting index, noting that at each addition operation, the + // same number of elements is added (n_add). + int start = 0; + for (std::size_t i = 0; i < inds.size(); i++) { + + // Compute number of append loops that are performed for one + // full "round" over this index. + int rest = 1; + for (std::size_t j = i + 1; j < inds.size(); j++) { + rest *= fDims[j]; + } + + // Compute the number of append loops for the given number of + // "rounds". + start += inds[i] * rest; + } + start *= c_shape * n_add; // Number of appended elements before this append loop. + start += c_count * n_add; // Number of appended elements in the current append loop. + + // Append the elements to the output. + for (int i = start; i < start + n_add; i++) { + out -> push_back(x[i]); + } + } + + std::size_t fCDim; // Concatenation dimension. + std::vector fShapeA; // Shape of input a. + std::vector fShapeB; // Shape of input b. + std::vector fDims; // Output dimensions. +}; + +} // TMVA. +} // Experimental. +} // SOFIE. + +#endif // TMVA_SOFIE_RMODULE_CAT_H_ diff --git a/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_GCNConv.hxx b/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_GCNConv.hxx new file mode 100644 index 0000000000000..90395a30bc494 --- /dev/null +++ b/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_GCNConv.hxx @@ -0,0 +1,308 @@ +// @(#)root/tmva/sofie:$Id$ +// Author: Stefan van Berkum + +/** + * Graph convolution module. + * + * This module applies the graph convolution operation X = D^(-1/2) * A * + * D^(-1/2) * X * Theta. +*/ + +#ifndef TMVA_SOFIE_RMODULE_GCNCONV_H_ +#define TMVA_SOFIE_RMODULE_GCNCONV_H_ + +#include "TMVA/TorchGNN/modules/RModule.hxx" +#include +#include +#include +#include + +namespace TMVA { +namespace Experimental { +namespace SOFIE { + +class RModule_GCNConv: public RModule { + public: + /** + * Construct the graph convolution module without edge weights. + * + * @param x The input feature matrix of shape (n_nodes, in_features). + * @param edge_index The edge indices matrix of shape (2, n_edges). + * @param in_features The size of each input sample. + * @param out_features The size of each output sample. + * @param improved True if self-loops should have a weight of two (A = A + + * 2I). Defaults to false. + * @param add_self_loops True if self-loops should be added (A = A + I). + * Defaults to true. + * @param normalize True if self-loops should be added and symmetric + * normalization should be computed on the fly. + * @param bias True if a bias is included. Defaults to true. + */ + RModule_GCNConv(std::string x, std::string edge_index, int in_features, int out_features, bool improved=false, bool add_self_loops=true, bool normalize=true, bool bias=true) { + fInputFeatures = in_features; + fOutputFeatures = out_features; + fImprove = improved; + fSelfLoops = add_self_loops; + fNormalization = normalize; + fIncludeBias = bias; + fUseEdgeWeights = false; + + if (!fIncludeBias) { + fB = std::vector(fOutputFeatures); + } + + fInputs = {x, edge_index}; + fArgs = {std::to_string(in_features), std::to_string(out_features), std::to_string(improved), std::to_string(add_self_loops), std::to_string(normalize), std::to_string(bias)}; + } + + /** + * Construct the graph convolution module with edge weights. + * + * @param x The input feature matrix of shape (n_nodes, in_features). + * @param edge_index The edge indices matrix of shape (2, n_edges). + * @param edge_weight The edge weights vector of shape (n_edges). + * @param in_features The size of each input sample. + * @param out_features The size of each output sample. + * @param improved True if self-loops should have a weight of two (A = A + + * 2I). Defaults to false. + * @param add_self_loops True if self-loops should be added (A = A + I). + * Defaults to true. + * @param normalize True if self-loops should be added and symmetric + * normalization should be computed on the fly. + * @param bias True if a bias is included. Defaults to true. + */ + RModule_GCNConv(std::string x, std::string edge_index, std::string edge_weight, int in_features, int out_features, bool improved=false, bool add_self_loops=true, bool normalize=true, bool bias=true) { + fInputFeatures = in_features; + fOutputFeatures = out_features; + fImprove = improved; + fSelfLoops = add_self_loops; + fNormalization = normalize; + fIncludeBias = bias; + fUseEdgeWeights = true; + + if (!fIncludeBias) { + fB = std::vector(fOutputFeatures); + } + + fInputs = {x, edge_index, edge_weight}; + fArgs = {std::to_string(in_features), std::to_string(out_features), std::to_string(improved), std::to_string(add_self_loops), std::to_string(normalize), std::to_string(bias)}; + } + + /** Destruct the module. */ + ~RModule_GCNConv() {}; + + /** + * Applies the graph convolution operation to each node. + */ + void Forward() { + const std::vector& X = fInputModules[0] -> GetOutput(); + const std::vector& edge_index = fInputModules[1] -> GetOutput(); + + std::size_t n_nodes = X.size() / fInputFeatures; + std::size_t n_edges = edge_index.size() / 2; + + std::vector edge_weight; + if (fUseEdgeWeights) { + edge_weight = fInputModules[2] -> GetOutput(); + } else { + edge_weight = std::vector(n_edges, 1); + } + std::vector X_agg; + std::vector degree; + + if (fNormalization) { + if (fSelfLoops) { + if (fImprove) { + degree = std::vector(n_nodes, 2); + } else { + degree = std::vector(n_nodes, 1); + } + } else { + degree = std::vector(n_nodes, 0); + } + + // Loop through edges to get node degrees. + for (std::size_t i = 0; i < n_edges; i++) { + int target = edge_index[i + n_edges]; + degree[target] += edge_weight[i]; + } + } + + if (fNormalization && fSelfLoops) { + // Add self loops. + X_agg = X; + int self_weight; + if (fImprove) { + self_weight = 2; + } else { + self_weight = 1; + } + for (std::size_t i = 0; i < n_nodes; i++) { + for (int j = 0; j < fInputFeatures; j++) { + X_agg[i * fInputFeatures + j] *= self_weight / degree[i]; + } + } + } else { + // Set X_agg to zero. + X_agg = std::vector(X.size()); + } + + // Loop through the edges to aggregate information from neighboring + // nodes. + for (std::size_t i = 0; i < n_edges; i++) { + int source = edge_index[i]; + int target = edge_index[i + n_edges]; + + int x_start = source * fInputFeatures; + int x_agg_start = target * fInputFeatures; + float norm = edge_weight[i] / std::sqrt(degree[source] * degree[target]); + for (int j = 0; j < fInputFeatures; j++) { + if (fNormalization) { + X_agg[x_agg_start + j] += norm * X[x_start + j]; + } else { + X_agg[x_agg_start + j] += edge_weight[i] * X[x_start + j]; + } + } + } + + // Update node features. + // Perform matrix multiplications (Y = X_agg * W^T + 1b^T := + // X_agg W^T + B). + // X_agg, shape (m, k) -> (n_nodes, input_features). + // W, shape (n, k) -> (output_features, input_features). + // B, shape (m, n) -> (n_nodes, output_features). + int m = n_nodes; + int k = fInputFeatures; + int n = fOutputFeatures; + + fOutput = std::vector(n_nodes * fOutputFeatures, 0); // cblas sets (B = X_agg * W^T + B). + if (fIncludeBias) { + // Construct bias matrix (B = 1b^T). + std::vector one(n_nodes, 1); + cblas_sger(CblasRowMajor, n_nodes, fOutputFeatures, 1, one.data(), 1, fB.data(), 1, fOutput.data(), fOutputFeatures); + } + + // Perform matrix multiplication (Y = X_agg * W^T + B). + cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, m, n, k, 1, X_agg.data(), k, fW.data(), k, 1, fOutput.data(), n); + } + + /** + * Infer the output shape. + * + * For this module, the output shape is the same as the shape of the + * input feature matrix but with out_features on the last dimension + * instead of in_features. + * + * @returns The output shape. + */ + std::vector InferShape() { + std::vector shape = fInputModules[0] -> GetShape(); + shape.back() = fOutputFeatures; + return shape; + } + + /** + * Get the operation. + * + * @returns The name of the operation. + */ + std::string_view GetOperation() { + return "GCNConv"; + } + + /** + * Set the weights. + * + * @param weights The weight matrix. + */ + void SetWeights(std::vector weights) {fW = weights;} + + /** + * Set the biases. + * + * @param biases The bias vector. + */ + void SetBiases(std::vector biases) {fB = biases;} + + /** + * Save parameters. + * + * @param dir Save directory. + */ + void SaveParameters(std::string dir) { + // Save weights. + std::string fdir = dir + "/" + fName + "_lin_weight.dat"; + std::ofstream outfile = std::ofstream(fdir, std::ios::out | std::ios::binary); + outfile.write(reinterpret_cast(&fW[0]), fW.size() * sizeof(float)); + + if (fIncludeBias) { + // Save biases. + fdir = dir + "/" + fName + "_bias.dat"; + outfile = std::ofstream(fdir, std::ios::out | std::ios::binary); + outfile.write(reinterpret_cast(&fB[0]), fB.size() * sizeof(float)); + } + outfile.close(); + } + + /** + * Load saved parameters. + */ + void LoadParameters() { + std::string dir = __FILE__; + std::string del_string = "inc/modules/RModule_GCNConv.hxx"; + dir.replace(dir.find(del_string), del_string.size(), "params/"); + + // Load weights. + std::string param_dir = dir + fName + "_lin_weight.dat"; + std::ifstream infile = std::ifstream(param_dir, std::ios::in | std::ios::binary); + fW = std::vector(fInputFeatures * fOutputFeatures); + infile.read(reinterpret_cast(&fW[0]), fW.size() * sizeof(float)); + infile.close(); + + if (fIncludeBias) { + // Load biases. + param_dir = dir + fName + "_bias.dat"; + infile = std::ifstream(param_dir, std::ios::in | std::ios::binary); + fB = std::vector(fOutputFeatures); + infile.read(reinterpret_cast(&fB[0]), fB.size() * sizeof(float)); + infile.close(); + } + } + + /** + * Load parameters from PyTorch state dictionary. + * + * @param state_dict The state dictionary. + */ + void LoadParameters(std::map> state_dict) { + if (auto search = state_dict.find(fName + ".lin.weight"); search != state_dict.end()) { + fW = state_dict[fName + ".lin.weight"]; + } else { + std::cout << "WARNING: Weights for module " << fName << " not found." << std::endl; + } + + if (fIncludeBias) { + if (auto search = state_dict.find(fName + ".bias"); search != state_dict.end()) { + fB = state_dict[fName + ".bias"]; + } else { + std::cout << "WARNING: Biases for module " << fName << " not found." << std::endl; + } + } + } + private: + int fInputFeatures; // The size of each input sample. + int fOutputFeatures; // The size of each output sample. + bool fUseEdgeWeights; // True if edge weights are provided. + bool fImprove; // True if self-loops should get a weight of two. + bool fSelfLoops; // True if self-loops should be added. + bool fNormalization; // True if edge weights should be normalized. + bool fIncludeBias; // True if a bias is included. + std::vector fW; // Weight matrix W. + std::vector fB; // Bias vector b. +}; + +} // TMVA. +} // Experimental. +} // SOFIE. + +#endif // TMVA_SOFIE_RMODULE_GCNCONV_H_ diff --git a/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_GlobalMeanPool.hxx b/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_GlobalMeanPool.hxx new file mode 100644 index 0000000000000..bf20e35664950 --- /dev/null +++ b/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_GlobalMeanPool.hxx @@ -0,0 +1,130 @@ +// @(#)root/tmva/sofie:$Id$ +// Author: Stefan van Berkum + +/** + * Global mean pooling module. +*/ + +#ifndef TMVA_SOFIE_RMODULE_GLOBALMEANPOOL_H_ +#define TMVA_SOFIE_RMODULE_GLOBALMEANPOOL_H_ + +#include "TMVA/TorchGNN/modules/RModule.hxx" +#include +#include + +namespace TMVA { +namespace Experimental { +namespace SOFIE { + +class RModule_GlobalMeanPool: public RModule { + public: + /** + * Construct the global mean pooling module. + * + * @param X A node feature matrix of shape (N_1 + ... + N_B, + * n_features), where N_i denotes the number of nodes from graph i. + * @param batch The batch vector which assigns each node in x to a + * specific graph. + */ + RModule_GlobalMeanPool(std::string X, std::string batch) { + fInputs = {X, batch}; + fArgs = {}; + } + + /** Destruct the module. */ + ~RModule_GlobalMeanPool() {}; + + /** + * Apply the global mean pooling operation. + */ + void Forward() { + const std::vector& x = fInputModules[0] -> GetOutput(); + const std::vector& batch_float = fInputModules[1] -> GetOutput(); + std::vector batch(batch_float.begin(), batch_float.end()); + + int n_unique = fOutShape[0]; + int n_features = fOutShape[1]; + + fOutput = std::vector(n_unique * n_features); + + // TODO: This approach might lead to overflow for large feature + // values or many nodes. Should we work in logs or use Welford's + // online algorithm? + + // Sum all entries belonging to same graph. + std::vector counts(n_unique); + for (std::size_t i = 0; i < batch.size(); i++) { + int x_start = i * n_features; + int out_start = batch[i] * n_features; + + for (int j = 0; j < n_features; j++) { + fOutput[out_start + j] += x[x_start + j]; + } + counts[batch[i]]++; + } + + // Divide all features in a graph by the corresponding number of nodes. + for (int i = 0; i < n_unique; i++) { + int out_start = i * n_features; + for (int j = 0; j < n_features; j++) { + fOutput[out_start + j] /= counts[i]; + } + } + } + + /** + * Infer the output shape. + * + * For this module, the output shape is (n_unique, n_features), where + * n_unique denotes the number of graphs in the batch. + * + * @returns The output shape. + */ + std::vector InferShape() { + std::vector batch_float = fInputModules[1] -> GetOutput(); + + std::vector shape = fInputModules[0] -> GetShape(); + shape[0] = std::set(batch_float.begin(), batch_float.end()).size(); + return shape; + } + + /** + * Get the operation. + * + * @returns The name of the operation. + */ + std::string_view GetOperation() { + return "GlobalMeanPool"; + } + + /** + * Save parameters. + * + * Does nothing for this module. + * + * @param dir Save directory. + */ + void SaveParameters([[maybe_unused]] std::string dir) {} + + /** + * Load saved parameters. + * + * Does nothing for this module. + */ + void LoadParameters() {} + + /** + * Load parameters from PyTorch state dictionary. + * + * Does nothing for this module. + * + * @param state_dict The state dictionary. + */ + void LoadParameters([[maybe_unused]] std::map> state_dict) {} +}; + +} // TMVA. +} // Experimental. +} // SOFIE. + +#endif // TMVA_SOFIE_RMODULE_GLOBALMEANPOOL_H_ diff --git a/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_Input.hxx b/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_Input.hxx new file mode 100644 index 0000000000000..b5eff30123be6 --- /dev/null +++ b/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_Input.hxx @@ -0,0 +1,116 @@ +// @(#)root/tmva/sofie:$Id$ +// Author: Stefan van Berkum + +/** + * Input module. + * + * Used internally. +*/ + +#ifndef TMVA_SOFIE_RMODULE_INPUT_H_ +#define TMVA_SOFIE_RMODULE_INPUT_H_ + +#include "TMVA/TorchGNN/modules/RModule.hxx" +#include + +namespace TMVA { +namespace Experimental { +namespace SOFIE { + +class RModule_Input: public RModule { + public: + /** + * Construct the input module. + * + * The module stores the input and its shape so that child modules can access it. + * + * @param input_shape The shape of the input. + */ + RModule_Input(std::vector input_shape) { + fInShape = input_shape; + fWildcard = std::find(input_shape.begin(), input_shape.end(), -1) - input_shape.begin(); + + fInputs = {}; // No previous inputs to this module. + fArgs = {}; + } + + /** Destruct the module. */ + ~RModule_Input() {}; + + /** + * Assign input. + * + * @param input The input. + */ + void SetParams(std::vector input) { + fOutput = input; + } + + /** + * Does nothing for this module. + */ + void Forward() {} + + /** + * Infer the output shape. + * + * For this module, the output shape is the same as the input shape + * with an inferred value for the wildcard dimension. + * + * @returns The output shape. + */ + std::vector InferShape() { + int cprod = 1; + for (std::size_t i = 0; i < fInShape.size(); i++) { + if (i != fWildcard) { + cprod *= fInShape[i]; + } + } + std::vector shape = fInShape; + shape[fWildcard] = fOutput.size() / cprod; + return shape; + } + + /** + * Get the operation. + * + * @returns The name of the operation. + */ + std::string_view GetOperation() { + return "Input"; + } + + /** + * Save parameters. + * + * Does nothing for this module. + * + * @param dir Save directory. + */ + void SaveParameters([[maybe_unused]] std::string dir) {} + + /** + * Load saved parameters. + * + * Does nothing for this module. + */ + void LoadParameters() {} + + /** + * Load parameters from PyTorch state dictionary. + * + * Does nothing for this module. + * + * @param state_dict The state dictionary. + */ + void LoadParameters([[maybe_unused]] std::map> state_dict) {} + private: + std::size_t fWildcard; // Index of the wildcard dimension. + std::vector fInShape; // Input shape. +}; + +} // TMVA. +} // Experimental. +} // SOFIE. + +#endif // TMVA_SOFIE_RMODULE_INPUT_H_ diff --git a/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_Linear.hxx b/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_Linear.hxx new file mode 100644 index 0000000000000..be9312e42b7cf --- /dev/null +++ b/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_Linear.hxx @@ -0,0 +1,231 @@ +// @(#)root/tmva/sofie:$Id$ +// Author: Stefan van Berkum + +/** + * Linear module. + * + * This module applies a linear transformation (Ax + b) to the data. For + * matrix inputs of shape (n_obs, n_features), it applies the transformation + * (XA^T + 1b^T). +*/ + +#ifndef TMVA_SOFIE_RMODULE_LINEAR_H_ +#define TMVA_SOFIE_RMODULE_LINEAR_H_ + +#include "TMVA/TorchGNN/modules/RModule.hxx" +#include +#include +#include + +namespace TMVA { +namespace Experimental { +namespace SOFIE { + +class RModule_Linear: public RModule { + public: + /** + * Construct the linear module. + * + * @param x The input. + * @param in_features The size of each input sample. + * @param out_features The size of each output sample. + * @param bias True if a bias is included. Defaults to true. + */ + RModule_Linear(std::string x, int in_features, int out_features, bool bias=true) { + fInputFeatures = in_features; + fOutputFeatures = out_features; + fIncludeBias = bias; + + if (!fIncludeBias) { + fB = std::vector(fOutputFeatures); + } + + fInputs = {x}; + fArgs = {std::to_string(in_features), std::to_string(out_features), std::to_string(bias)}; + } + + /** Destruct the module. */ + ~RModule_Linear() {}; + + /** + * Applies the linear transformation (y = Ax + b) to each element in the + * input. + */ + void Forward() { + const std::vector& in = fInputModules[0] -> GetOutput(); + + + if (fRowDim > 1) { + // Perform matrix multiplications (Y = XA^T + 1b^T := XA^T + B). + // X, shape (m, k) -> (row_dim, input_features). + // A, shape (n, k) -> (output_features, input_features). + // B, shape (m, n) -> (row_dim, output_features). + int m = fRowDim; + int k = fInputFeatures; + int n = fOutputFeatures; + for (std::size_t i = 0; i < in.size(); i += fRowDim * fInputFeatures) { + // Get input matrix. + std::vector X(in.begin() + i, in.begin() + i + fRowDim * fInputFeatures); + + fOutput = std::vector(fRowDim * fOutputFeatures, 0); // cblas sets (B = XA^T + B). + if (fIncludeBias) { + // Construct bias matrix (B = 1b^T). + std::vector one(fRowDim, 1); + cblas_sger(CblasRowMajor, fRowDim, fOutputFeatures, 1, one.data(), 1, fB.data(), 1, fOutput.data(), fOutputFeatures); + } + + // Perform matrix multiplication (Y = XA^T + B). + cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, m, n, k, 1, X.data(), k, fA.data(), k, 1, fOutput.data(), n); + } + } else { + fOutput.clear(); + fOutput.reserve(fNumOut); + + // Perform matrix-vector multiplications (y = Ax + b). + // A, shape (n, m) -> (output_features, input_features). + // x, shape (m, 1) -> (input_features, 1). + // b, shape (n, 1) -> (output_features, 1). + int m = fInputFeatures; + int n = fOutputFeatures; + for (std::size_t i = 0; i < in.size(); i += fInputFeatures) { + // Get input vector. + std::vector x(in.begin() + i, in.begin() + i + fInputFeatures); + + // Copy bias. + std::vector y(fB); + + // Perform matrix-vector multiplication (y = Ax + b). + cblas_sgemv(CblasRowMajor, CblasNoTrans, n, m, 1, fA.data(), m, x.data(), 1, 1, y.data(), 1); + for (float elem: y) { // cblas sets (b = Ax + b). + fOutput.push_back(elem); + } + } + } + } + + /** + * Infer the output shape. + * + * For this module, the output shape is the same as the input shape but + * with out_features on the last dimension instead of in_features. + * + * @returns The output shape. + */ + std::vector InferShape() { + std::vector shape = fInputModules[0] -> GetShape(); + shape.back() = fOutputFeatures; + + if (shape.size() > 1) { + fRowDim = shape[shape.size() - 2]; + } + + fNumOut = 1; + for (int dim: shape) { + fNumOut *= dim; + } + return shape; + } + + /** + * Get the operation. + * + * @returns The name of the operation. + */ + std::string_view GetOperation() { + return "Linear"; + } + + /** + * Set the weights. + * + * @param weights The weight matrix. + */ + void SetWeights(std::vector weights) {fA = weights;} + + /** + * Set the biases. + * + * @param biases The bias vector. + */ + void SetBiases(std::vector biases) {fB = biases;} + + /** + * Save parameters. + * + * @param dir Save directory. + */ + void SaveParameters(std::string dir) { + // Save weights. + std::string fdir = dir + "/" + fName + "_weight.dat"; + std::ofstream outfile = std::ofstream(fdir, std::ios::out | std::ios::binary); + outfile.write(reinterpret_cast(&fA[0]), fA.size() * sizeof(float)); + + if (fIncludeBias) { + // Save biases. + fdir = dir + "/" + fName + "_bias.dat"; + outfile = std::ofstream(fdir, std::ios::out | std::ios::binary); + outfile.write(reinterpret_cast(&fB[0]), fB.size() * sizeof(float)); + } + outfile.close(); + } + + /** + * Load saved parameters. + */ + void LoadParameters() { + std::string dir = __FILE__; + std::string del_string = "inc/modules/RModule_Linear.hxx"; + dir.replace(dir.find(del_string), del_string.size(), "params/"); + + // Load weights. + std::string param_dir = dir + fName + "_weight.dat"; + std::ifstream infile = std::ifstream(param_dir, std::ios::in | std::ios::binary); + fA = std::vector(fInputFeatures * fOutputFeatures); + infile.read(reinterpret_cast(&fA[0]), fA.size() * sizeof(float)); + infile.close(); + + if (fIncludeBias) { + // Load biases. + param_dir = dir + fName + "_bias.dat"; + infile = std::ifstream(param_dir, std::ios::in | std::ios::binary); + fB = std::vector(fOutputFeatures); + infile.read(reinterpret_cast(&fB[0]), fB.size() * sizeof(float)); + infile.close(); + } + } + + /** + * Load parameters from PyTorch state dictionary. + * + * @param state_dict The state dictionary. + */ + void LoadParameters(std::map> state_dict) { + if (auto search = state_dict.find(fName + ".weight"); search != state_dict.end()) { + fA = state_dict[fName + ".weight"]; + } else { + std::cout << "WARNING: Weights for module " << fName << " not found." << std::endl; + } + + if (fIncludeBias) { + if (auto search = state_dict.find(fName + ".bias"); search != state_dict.end()) { + fB = state_dict[fName + ".bias"]; + } else { + std::cout << "WARNING: Biases for module " << fName << " not found." << std::endl; + } + } + } + private: + int fInputFeatures; // The size of each input sample. + int fOutputFeatures; // The size of each output sample. + bool fIncludeBias; // True if a bias is included. + int fRowDim = 1; // Size of the second to last input dimension. + int fNumOut; // The number of output elements. + std::vector fA; // Weight matrix A. + std::vector fB; // Bias vector b. +}; + +} // TMVA. +} // Experimental. +} // SOFIE. + +#endif // TMVA_SOFIE_RMODULE_LINEAR_H_ diff --git a/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_ReLU.hxx b/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_ReLU.hxx new file mode 100644 index 0000000000000..a0c1b2792b274 --- /dev/null +++ b/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_ReLU.hxx @@ -0,0 +1,98 @@ +// @(#)root/tmva/sofie:$Id$ +// Author: Stefan van Berkum + +/** + * ReLU module. +*/ + +#ifndef TMVA_SOFIE_RMODULE_RELU_H_ +#define TMVA_SOFIE_RMODULE_RELU_H_ + +#include "TMVA/TorchGNN/modules/RModule.hxx" + +namespace TMVA { +namespace Experimental { +namespace SOFIE { + +class RModule_ReLU: public RModule { + public: + /** + * Construct the ReLU module. + * + * The ReLU operation will be applied element-wise. + * + * @param x The input. + */ + RModule_ReLU(std::string x) { + fInputs = {x}; + fArgs = {}; + } + + /** Destruct the module. */ + ~RModule_ReLU() {}; + + /** + * Apply the ReLU operation min(0, x). + */ + void Forward() { + const std::vector& x = fInputModules[0] -> GetOutput(); + std::size_t n = x.size(); + fOutput.resize(n); + + for (std::size_t i = 0; i < n; i++) { + fOutput[i] = (x[i] < 0) ? 0 : x[i]; + } + } + + /** + * Infer the output shape. + * + * For this module, the output shape is the same as the input shape. + * + * @returns The output shape. + */ + std::vector InferShape() { + std::vector shape = fInputModules[0] -> GetShape(); + return shape; + } + + /** + * Get the operation. + * + * @returns The name of the operation. + */ + std::string_view GetOperation() { + return "ReLU"; + } + + /** + * Save parameters. + * + * Does nothing for this module. + * + * @param dir Save directory. + */ + void SaveParameters([[maybe_unused]] std::string dir) {} + + /** + * Load saved parameters. + * + * Does nothing for this module. + */ + void LoadParameters() {} + + /** + * Load parameters from PyTorch state dictionary. + * + * Does nothing for this module. + * + * @param state_dict The state dictionary. + */ + void LoadParameters([[maybe_unused]] std::map> state_dict) {} +}; + +} // TMVA. +} // Experimental. +} // SOFIE. + +#endif // TMVA_SOFIE_RMODULE_RELU_H_ diff --git a/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_Reshape.hxx b/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_Reshape.hxx new file mode 100644 index 0000000000000..c7d10d5c74a13 --- /dev/null +++ b/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_Reshape.hxx @@ -0,0 +1,136 @@ +// @(#)root/tmva/sofie:$Id$ +// Author: Stefan van Berkum + +/** + * Reshape module. +*/ + +#ifndef TMVA_SOFIE_RMODULE_RESHAPE_H_ +#define TMVA_SOFIE_RMODULE_RESHAPE_H_ + +#include "TMVA/TorchGNN/modules/RModule.hxx" +#include +#include + +namespace TMVA { +namespace Experimental { +namespace SOFIE { + +class RModule_Reshape: public RModule { + public: + /** + * Construct the reshape module. + * + * Accepts one wildcard dimension (-1). + * + * @param x The input. + * @param shape The desired shape. + */ + RModule_Reshape(std::string x, std::vector shape) { + fOutShape = shape; + fWildcard = std::find(shape.begin(), shape.end(), -1) - shape.begin(); + + // Check shape. + if (std::any_of(shape.begin(), shape.end(), [](int i){return i == 0;})) { + throw std::invalid_argument("Dimension cannot be zero."); + } + if (std::any_of(shape.begin(), shape.end(), [](int i){return i < -1;})) { + throw std::invalid_argument("Shape cannot have negative entries (except for the wildcard dimension)."); + } + if (std::count(shape.begin(), shape.end(), -1) > 1) { + throw std::invalid_argument("Shape may have at most one wildcard."); + } + + // Translate shape argument to string. + std::string shape_arg = "{"; + bool first = true; + for (int i: shape) { + if (!first) { + shape_arg += ", "; + } else { + first = false; + } + shape_arg += i; + } + shape_arg += "}"; + + fInputs = {x}; + fArgs = {shape_arg}; + } + + /** Destruct the module. */ + ~RModule_Reshape() {}; + + /** + * Simply forward the input. + * + * Reshaping is done through the inferShape() method. + */ + void Forward() { + const std::vector& x = fInputModules[0] -> GetOutput(); + fOutput = x; + } + + /** + * Infer the output shape. + * + * For this module, the output shape is given by the user. + * + * @returns The output shape. + */ + std::vector InferShape() { + int cprod = 1; + for (std::size_t i = 0; i < fOutShape.size(); i++) { + if (i != fWildcard) { + cprod *= fOutShape[i]; + } + } + std::vector shape = fOutShape; + std::vector x = fInputModules[0] -> GetOutput(); + shape[fWildcard] = x.size() / cprod; + return shape; + } + + /** + * Get the operation. + * + * @returns The name of the operation. + */ + std::string_view GetOperation() { + return "Reshape"; + } + + /** + * Save parameters. + * + * Does nothing for this module. + * + * @param dir Save directory. + */ + void SaveParameters([[maybe_unused]] std::string dir) {} + + /** + * Load saved parameters. + * + * Does nothing for this module. + */ + void LoadParameters() {} + + /** + * Load parameters from PyTorch state dictionary. + * + * Does nothing for this module. + * + * @param state_dict The state dictionary. + */ + void LoadParameters([[maybe_unused]] std::map> state_dict) {} + private: + std::size_t fWildcard; // Index of the wildcard dimension. + std::vector fOutShape; // Desired output shape. +}; + +} // TMVA. +} // Experimental. +} // SOFIE. + +#endif // TMVA_SOFIE_RMODULE_RESHAPE_H_ diff --git a/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_Softmax.hxx b/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_Softmax.hxx new file mode 100644 index 0000000000000..a44e91ffa7bb5 --- /dev/null +++ b/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_Softmax.hxx @@ -0,0 +1,107 @@ +// @(#)root/tmva/sofie:$Id$ +// Author: Stefan van Berkum + +/** + * Softmax module. +*/ + +#ifndef TMVA_SOFIE_RMODULE_SOFTMAX_H_ +#define TMVA_SOFIE_RMODULE_SOFTMAX_H_ + +#include "TMVA/TorchGNN/modules/RModule.hxx" +#include + +namespace TMVA { +namespace Experimental { +namespace SOFIE { + +class RModule_Softmax: public RModule { + public: + /** + * Construct the softmax module. + * + * @param x The input. + */ + RModule_Softmax(std::string x) { + fInputs = {x}; + fArgs = {}; + } + + /** Destruct the module. */ + ~RModule_Softmax() {}; + + /** + * Apply the softmax operation exp(x_i) / sum(exp(x_j)). + * + * The sum is taken over the last dimension. + */ + void Forward() { + const std::vector& x = fInputModules[0] -> GetOutput(); + int last_dim = fInputModules[0] -> GetShape().back(); + fOutput = x; + + for (std::size_t i = 0; i < x.size(); i += last_dim) { + float exps[last_dim]; + float sum = 0; + for (std::size_t j = i; j < i + last_dim; j++) { + exps[j - i] = std::exp(x[j]); + sum += exps[j - i]; + } + for (std::size_t j = i; j < i + last_dim; j++) { + fOutput[j] = exps[j - i] / sum; + } + } + } + + /** + * Infer the output shape. + * + * For this module, the output shape is the same as the input shape. + * + * @returns The output shape. + */ + std::vector InferShape() { + std::vector shape = fInputModules[0] -> GetShape(); + return shape; + } + + /** + * Get the operation. + * + * @returns The name of the operation. + */ + std::string_view GetOperation() { + return "Softmax"; + } + + /** + * Save parameters. + * + * Does nothing for this module. + * + * @param dir Save directory. + */ + void SaveParameters([[maybe_unused]] std::string dir) {} + + /** + * Load saved parameters. + * + * Does nothing for this module. + */ + void LoadParameters() {} + + /** + * Load parameters from PyTorch state dictionary. + * + * Does nothing for this module. + * + * @param state_dict The state dictionary. + */ + void LoadParameters([[maybe_unused]] std::map> state_dict) {} +}; + +} // TMVA. +} // Experimental. +} // SOFIE. + +#endif // TMVA_SOFIE_RMODULE_SOFTMAX_H_ diff --git a/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_SparseGCNConv.hxx b/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_SparseGCNConv.hxx new file mode 100644 index 0000000000000..1d59ef82122f6 --- /dev/null +++ b/tmva/sofie/inc/TMVA/TorchGNN/modules/RModule_SparseGCNConv.hxx @@ -0,0 +1,285 @@ +// @(#)root/tmva/sofie:$Id$ +// Author: Stefan van Berkum + +/** + * Graph convolution module using Eigen for sparse operations. + * + * This module applies the graph convolution operation X = D^(-1/2) * A * + * D^(-1/2) * X * Theta. + * + * Its performance is currently worse than the regular GCNConv module, so it is + * not made available to users. +*/ + +#ifndef RMODULE_SPARSEGCNCONV_H_ +#define RMODULE_SPARSEGCNCONV_H_ + +#include "TMVA/TorchGNN/modules/RModule.hxx" +#include +#include +#include +#include + +namespace TMVA { +namespace Experimental { +namespace SOFIE { + +class RModule_SparseGCNConv: public RModule { + public: + /** + * Construct the graph convolution module without edge weights. + * + * @param x The input feature matrix of shape (n_nodes, in_features). + * @param edge_index The edge indices matrix of shape (2, n_edges). + * @param in_features The size of each input sample. + * @param out_features The size of each output sample. + * @param improved True if self-loops should have a weight of two (A = A + + * 2I). Defaults to false. + * @param add_self_loops True if self-loops should be added (A = A + I). + * Defaults to true. + * @param normalize True if self-loops should be added and symmetric + * normalization should be computed on the fly. + * @param bias True if a bias is included. Defaults to true. + */ + RModule_SparseGCNConv(std::string x, std::string edge_index, int in_features, int out_features, bool improved=false, bool add_self_loops=true, bool normalize=true, bool bias=true) { + fInputFeatures = in_features; + fOutputFeatures = out_features; + fImprove = improved; + fSelfLoops = add_self_loops; + fNormalization = normalize; + fIncludeBias = bias; + fEdgeWeights = false; + + if (!fIncludeBias) { + fB = std::vector(fOutputFeatures); + } + + fInputs = {x, edge_index}; + fArgs = {std::to_string(in_features), std::to_string(out_features), std::to_string(improved), std::to_string(add_self_loops), std::to_string(normalize), std::to_string(bias)}; + } + + /** + * Construct the graph convolution module with edge weights. + * + * @param x The input feature matrix of shape (n_nodes, in_features). + * @param edge_index The edge indices matrix of shape (2, n_edges). + * @param edge_weight The edge weights vector of shape (n_edges). + * @param in_features The size of each input sample. + * @param out_features The size of each output sample. + * @param improved True if self-loops should have a weight of two (A = A + + * 2I). Defaults to false. + * @param add_self_loops True if self-loops should be added (A = A + I). + * Defaults to true. + * @param normalize True if self-loops should be added and symmetric + * normalization should be computed on the fly. + * @param bias True if a bias is included. Defaults to true. + */ + RModule_SparseGCNConv(std::string x, std::string edge_index, std::string edge_weight, int in_features, int out_features, bool improved=false, bool add_self_loops=true, bool normalize=true, bool bias=true) { + fInputFeatures = in_features; + fOutputFeatures = out_features; + fImprove = improved; + fSelfLoops = add_self_loops; + fNormalization = normalize; + fIncludeBias = bias; + fEdgeWeights = true; + + if (!fIncludeBias) { + fB = std::vector(fOutputFeatures); + } + + fInputs = {x, edge_index, edge_weight}; + fArgs = {std::to_string(in_features), std::to_string(out_features), std::to_string(improved), std::to_string(add_self_loops), std::to_string(normalize), std::to_string(bias)}; + } + + /** Destruct the module. */ + ~RModule_SparseGCNConv() {}; + + /** + * Applies the graph convolution operation to each node. + * + * @returns The updated feature matrix. + */ + void Forward() { + const std::vector& X_const = fInputModules[0] -> GetOutput(); + std::vector X = X_const; + const std::vector& edge_index_f = fInputModules[1] -> GetOutput(); + + std::size_t n_nodes = X.size() / fInputFeatures; + std::size_t n_edges = edge_index_f.size() / 2; + + std::vector edge_weight; + if (fEdgeWeights) { + const std::vector& edge_weight_f = fInputModules[2] -> GetOutput(); + } else { + edge_weight = std::vector(n_edges, 1); + } + std::vector degree; + + typedef Eigen::Triplet T; + std::vector edge_list; + edge_list.reserve(n_edges); + for(std::size_t i = 0; i < n_edges; i++) { + int source = edge_index_f[i]; + int target = edge_index_f[i + n_edges]; + edge_list.push_back(T(source, target, edge_weight[i])); + } + Eigen::SparseMatrix A(n_nodes, n_nodes); + A.setFromTriplets(edge_list.begin(), edge_list.end()); + + typedef Eigen::Matrix rMatrix; + if (fNormalization) { + if (fSelfLoops) { + if (fImprove) { + A = A + 2 * rMatrix::Identity(n_nodes, n_nodes); + degree = std::vector(n_nodes, 2); + } else { + A = A + rMatrix::Identity(n_nodes, n_nodes); + degree = std::vector(n_nodes, 1); + } + } else { + degree = std::vector(n_nodes, 0); + } + + // Loop through edges to get node degrees. + for (std::size_t i = 0; i < n_edges; i++) { + int target = edge_index_f[i + n_edges]; + degree[target] += edge_weight[i]; + } + } + + Eigen::Map d(degree.data(), n_nodes); + d = d.array().sqrt().inverse(); + auto D = d.asDiagonal(); + A = D * A * D; + + Eigen::Map X_m(X.data(), n_nodes, fInputFeatures); + Eigen::Map theta(fW.data(), fOutputFeatures, fInputFeatures); + rMatrix out = A * X_m * theta.transpose(); + + if (fIncludeBias) { + Eigen::Map bias(fB.data(), fOutputFeatures); + out = out.rowwise() + bias; + } + fOutput = std::vector(out.data(), out.data() + n_nodes * fOutputFeatures); + } + + /** + * Infer the output shape. + * + * For this module, the output shape is the same as the shape of the + * input feature matrix but with out_features on the last dimension + * instead of in_features. + * + * @returns The output shape. + */ + std::vector InferShape() { + std::vector shape = fInputModules[0] -> GetShape(); + shape.back() = fOutputFeatures; + return shape; + } + + /** + * Get the operation. + * + * @returns The name of the operation. + */ + std::string_view GetOperation() { + return "SparseGCNConv"; + } + + /** + * Set the weights. + * + * @param weights The weight matrix. + */ + void SetWeights(std::vector weights) {fW = weights;} + + /** + * Set the biases. + * + * @param biases The bias vector. + */ + void SetBiases(std::vector biases) {fB = biases;} + + /** + * Save parameters. + * + * @param dir Save directory. + */ + void SaveParameters(std::string dir) { + // Save weights. + std::string fdir = dir + "/" + fName + "_lin_weight.dat"; + std::ofstream outfile = std::ofstream(fdir, std::ios::out | std::ios::binary); + outfile.write(reinterpret_cast(&fW[0]), fW.size() * sizeof(float)); + + if (fIncludeBias) { + // Save biases. + fdir = dir + "/" + fName + "_bias.dat"; + outfile = std::ofstream(fdir, std::ios::out | std::ios::binary); + outfile.write(reinterpret_cast(&fB[0]), fB.size() * sizeof(float)); + } + outfile.close(); + } + + /** + * Load saved parameters. + */ + void LoadParameters() { + std::string dir = __FILE__; + std::string del_string = "inc/modules/RModule_SparseGCNConv.hxx"; + dir.replace(dir.find(del_string), del_string.size(), "params/"); + + // Load weights. + std::string param_dir = dir + fName + "_lin_weight.dat"; + std::ifstream infile = std::ifstream(param_dir, std::ios::in | std::ios::binary); + fW = std::vector(fInputFeatures * fOutputFeatures); + infile.read(reinterpret_cast(&fW[0]), fW.size() * sizeof(float)); + infile.close(); + + if (fIncludeBias) { + // Load biases. + param_dir = dir + fName + "_bias.dat"; + infile = std::ifstream(param_dir, std::ios::in | std::ios::binary); + fB = std::vector(fOutputFeatures); + infile.read(reinterpret_cast(&fB[0]), fB.size() * sizeof(float)); + infile.close(); + } + } + + /** + * Load parameters from PyTorch state dictionary. + * + * @param state_dict The state dictionary. + */ + void LoadParameters(std::map> state_dict) { + if (auto search = state_dict.find(fName + ".lin.weight"); search != state_dict.end()) { + fW = state_dict[fName + ".lin.weight"]; + } else { + std::cout << "WARNING: Weights for module " << fName << " not found." << std::endl; + } + + if (fIncludeBias) { + if (auto search = state_dict.find(fName + ".bias"); search != state_dict.end()) { + fB = state_dict[fName + ".bias"]; + } else { + std::cout << "WARNING: Biases for module " << fName << " not found." << std::endl; + } + } + } + private: + int fInputFeatures; // The size of each input sample. + int fOutputFeatures; // The size of each output sample. + bool fEdgeWeights; // True if edge weights are provided. + bool fImprove; // True if self-loops should get a weight of two. + bool fSelfLoops; // True if self-loops should be added. + bool fNormalization; // True if edge weights should be normalized. + bool fIncludeBias; // True if a bias is included. + std::vector fW; // Weight matrix W. + std::vector fB; // Bias vector b. +}; + +} // TMVA. +} // Experimental. +} // SOFIE. + +#endif // RMODULE_SPARSEGCNCONV_H_ diff --git a/tmva/sofie/src/TorchGNN/RModel_TorchGNN.cxx b/tmva/sofie/src/TorchGNN/RModel_TorchGNN.cxx new file mode 100644 index 0000000000000..2765cabd6d698 --- /dev/null +++ b/tmva/sofie/src/TorchGNN/RModel_TorchGNN.cxx @@ -0,0 +1,294 @@ +// @(#)root/tmva/sofie:$Id$ +// Author: Stefan van Berkum + +/** + * Source file for PyTorch Geometric models. + * + * Models are created by the user and parameters can then be loaded into each layer. + * + * IMPORTANT: Changes to the format (e.g., namespaces) may affect the emit + * defined in RModel_TorchGNN.cxx (save). +*/ + +#include "TMVA/TorchGNN/RModel_TorchGNN.hxx" +#include +#include +#include +#include +#include +#include +#include +#include + +namespace TMVA { +namespace Experimental { +namespace SOFIE { + +/** + * Save the model as standalone inference code. + * + * @param path Path to save location. + * @param name Model name. + * @param overwrite True if any existing directory should be + * overwritten. Defaults to false. +*/ +void RModel_TorchGNN::Save(std::string path, std::string name, bool overwrite /*=false*/) { + std::string dir = path + "/" + name; + + // Get timestamp. + std::string timestamp = GetTimestamp(); + + if (std::filesystem::exists(dir)) { + if (overwrite) { + // Clean directory. + std::filesystem::remove_all(dir); + } else { + // Display warning. + std::cout << "WARNING: Could not save model. Directory " << dir << " exists and overwrite is set to false."; + } + } + std::filesystem::create_directories(dir); + + // Write methods. + WriteMethods(dir, name, timestamp); + + // Write model. + WriteModel(dir, name, timestamp); + + // Write CMakeLists. + WriteCMakeLists(dir, name, timestamp); + + // Create parameter directory. + std::filesystem::path param_dir = std::filesystem::path(dir); + param_dir /= "params"; + std::filesystem::create_directory(param_dir); + + // Save parameters. + for (std::shared_ptr m: fModules) { + m -> SaveParameters(param_dir); + } +} + +/** + * Write the methods to create a self-contained package. + * + * @param dir Directory to save to. + * @param name Model name. + * @param timestamp Timestamp. +*/ +void RModel_TorchGNN::WriteMethods(std::string dir, std::string name, std::string timestamp) { + // Retrieve directories. + std::filesystem::path src_dir = std::filesystem::path(__FILE__).parent_path(); + std::filesystem::path inc_dir = src_dir.parent_path().parent_path(); + inc_dir /= "inc"; + inc_dir /= "TMVA"; + inc_dir /= "TorchGNN"; + + // Copy header files. + std::filesystem::create_directory(dir + "/inc"); + std::filesystem::copy(inc_dir, dir + "/inc", std::filesystem::copy_options::recursive); + + // Copy source files. + std::filesystem::create_directory(dir + "/src"); + std::filesystem::copy(src_dir, dir + "/src", std::filesystem::copy_options::recursive); + + // Iterate over the files to fix the namespaces and other issues. + std::filesystem::recursive_directory_iterator file_iter = std::filesystem::recursive_directory_iterator(dir); + std::string line; + for (const std::filesystem::directory_entry& entry: file_iter) { + if (entry.is_regular_file()) { + // Load file. + std::ifstream fin; + fin.open(entry.path()); + + // Create a temporary file. + std::ofstream temp; + std::filesystem::path temp_path = entry.path(); + temp_path.replace_filename("temp" + std::string(temp_path.extension())); + temp.open(temp_path); + + // Write header. + temp << "// Automatically generated for " << name << "." << std::endl; + temp << "// " << timestamp << std::endl << std::endl; + + while (std::getline(fin, line)) { + if ( + ( + (line.find("namespace TMVA {") == std::string::npos) && + (line.find("namespace Experimental {") == std::string::npos) && + (line.find("namespace SOFIE {") == std::string::npos) && + (line.find("} // SOFIE.") == std::string::npos) && + (line.find("} // Experimental.") == std::string::npos) && + (line.find("} // TMVA.") == std::string::npos) + ) + || (line.find("line.find") != std::string::npos) + ) { + // Not a namespace line, so fix other issues and write + // to file. + std::string del_string = " TMVA_SOFIE_"; + if ((line.find(del_string) != std::string::npos) && (line.find("del_string") == std::string::npos)) { + line.replace(line.find(del_string), del_string.size(), " "); + } + del_string = "\"TMVA/TorchGNN/"; + if ((line.find(del_string) != std::string::npos) && (line.find("del_string") == std::string::npos)) { + line.replace(line.find(del_string), del_string.size(), "\""); + } + del_string = "\"modules/"; + if ((entry.path().filename().string().find("RModule_") != std::string::npos) && + (line.find(del_string) != std::string::npos) && + (line.find("del_string") == std::string::npos)) { + line.replace(line.find(del_string), del_string.size(), "\""); + } + del_string = "gsl/gsl_cblas.h"; + if ((line.find(del_string) != std::string::npos) && (line.find("del_string") == std::string::npos)) { + line.replace(line.find(del_string), del_string.size(), "cblas.h"); + } + temp << line << std::endl; + } + } + temp << std::endl; + + fin.close(); + temp.close(); + + std::filesystem::path new_path = temp_path; + new_path.replace_filename(entry.path().filename()); + std::filesystem::rename(temp_path, new_path); + } + } +} + +/** + * Write the model to a file. + * + * @param dir Directory to save to. + * @param name Model name. + * @param timestamp Timestamp. +*/ +void RModel_TorchGNN::WriteModel(std::string dir, std::string name, std::string timestamp) { + std::ofstream model; + model.open(dir + "/inc/" + name + ".hxx"); + + // Write header. + model << "// Automatically generated for " << name << "." << std::endl; + model << "// " << timestamp << std::endl << std::endl; + model << "/** Model definition. */" << std::endl << std::endl; + + // Write includes and save parameters. + model << "#include \"RModel_TorchGNN.hxx\"" << std::endl; + std::set used_modules; + for (std::shared_ptr m: fModules) { + // Record module operation. + used_modules.insert(m -> GetOperation()); + + // Save parameters. + std::string module_dir = dir + "/" + std::string(m -> GetName()); + std::filesystem::create_directory(dir + "/"); + m -> SaveParameters(module_dir); + } + for (std::string_view m: used_modules) { + model << "#include \"modules/RModule_" << m << ".hxx\"" << std::endl; + } + + model << std::endl; + + // Construct model. + model << "class " << name << ": public RModel_TorchGNN {" << std::endl; + model << "\tpublic:" << std::endl; + + // Write model construction. + model << "\t\t" << name << "(): RModel_TorchGNN({"; + bool first = true; + for (std::string in: fInputs) { // Input names. + if (!first) { + model << ", "; + } else { + first = false; + } + model << "\"" << in << "\""; + } + model << "}, {"; + first = true; + for (std::vector in_shape: fShapes) { // Input shapes. + if (!first) { + model << ", "; + } else { + first = false; + } + model << "{"; + bool first_dim = true; + for (int dim: in_shape) { + if (!first_dim) { + model << ", "; + } else { + first_dim = false; + } + model << dim; + } + model << "}"; + } + model << "}) {" << std::endl; + + // Write module additions. + for (std::shared_ptr m: fModules) { + if ((m -> GetOperation()) == "Input") { + // Skip input modules. + continue; + } + + std::string_view module_name = m -> GetName(); + std::string_view op = m -> GetOperation(); + std::vector module_inputs = m -> GetInputs(); + model << "\t\t\tAddModule(RModule_" << op << "("; + first = true; + for (std::string in: module_inputs) { // Input names. + if (!first) { + model << ", "; + } else { + first = false; + } + model << "\"" << in << "\""; + } + std::vector module_args = m -> GetArgs(); + for (std::string arg: module_args) { // Other arguments. + model << ", " << arg; + } + model << "), \"" << module_name << "\");" << std::endl; + } + // Write parameter loading. + model << "\t\t\tLoadParameters();" << std::endl; + + model << "\t\t}" << std::endl; + model << "};" << std::endl; + model.close(); +} + +/** + * Write the CMakeLists file. + * + * @param dir Directory to save to. + * @param name Model name. + * @param timestamp Timestamp. +*/ +void RModel_TorchGNN::WriteCMakeLists(std::string dir, std::string name, std::string timestamp) { + std::ofstream f; + f.open(dir + "/CMakeLists.txt"); + + // Write header. + f << "# Automatically generated for " << name << "." << std::endl; + f << "# " << timestamp << std::endl << std::endl; + + f << "add_library(" << std::endl; + f << "\t" << name << std::endl; + f << "\tinc/" << name << ".hxx" << std::endl; + f << "\tinc/RModel_TorchGNN.hxx" << std::endl; + f << "\tsrc/RModel_TorchGNN.cxx" << std::endl; + f << ")" << std::endl << std::endl; + + f << "target_include_directories(" << name << " PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/inc)" << std::endl; + f.close(); +} + +} // SOFIE. +} // Experimental. +} // TMVA. diff --git a/tmva/sofie/test/TorchGNN/AddTest.cxx b/tmva/sofie/test/TorchGNN/AddTest.cxx new file mode 100644 index 0000000000000..f048a1920121d --- /dev/null +++ b/tmva/sofie/test/TorchGNN/AddTest.cxx @@ -0,0 +1,73 @@ +/** + * Tests the TorchGNN addition module (RModule_Add). + * + * To run in ROOT terminal: + * .L path_to_root/tmva/sofie/test/TorchGNN/AddTest.cxx + * main() +*/ + +#include "TMVA/TorchGNN/RModel_TorchGNN.hxx" +#include "TMVA/TorchGNN/modules/RModule_Add.hxx" +#include + +using namespace TMVA::Experimental::SOFIE; + +int main() { + std::vector a = {1, 1.5, 2, 2.5}; + std::vector b = {1, 2, 4, 8}; + std::vector c = {0, 0.5, 0, 0.5}; + + std::vector expected = {2, 3.5, 6, 10.5}; + + RModel_TorchGNN model = RModel_TorchGNN({"a", "b"}, {{-1}, {-1}}); + model.AddModule(RModule_Add("a", "b"), "out_1"); + std::vector out = model.Forward(a, b); + + std::cout << "Expected:" << std::endl; + for (float x: expected) { + std::cout << x << std::endl; + } + std::cout << std::endl; + std::cout << "Actual:" << std::endl; + for (float x: out) { + std::cout << x << std::endl; + } + std::cout << std::endl; + + expected = {2, 4, 6, 11}; + + model = RModel_TorchGNN({"a", "b", "c"}, {{-1}, {-1}, {-1}}); + model.AddModule(RModule_Add("a", "b"), "out_1"); + model.AddModule(RModule_Add("out_1", "c"), "out_2"); + out = model.Forward(a, b, c); + + std::cout << "Expected:" << std::endl; + for (float x: expected) { + std::cout << x << std::endl; + } + std::cout << std::endl; + std::cout << "Actual:" << std::endl; + for (float x: out) { + std::cout << x << std::endl; + } + std::cout << std::endl; + + expected = {2, 4, 6, 11}; + + model = RModel_TorchGNN({"a", "a", "c"}, {{-1}, {-1}, {-1}}); + model.AddModule(RModule_Add("a", "a_1"), "out_1"); + model.AddModule(RModule_Add("out_1", "c"), "out_2"); + out = model.Forward(a, b, c); + + std::cout << "Expected:" << std::endl; + for (float x: expected) { + std::cout << x << std::endl; + } + std::cout << std::endl; + std::cout << "Actual:" << std::endl; + for (float x: out) { + std::cout << x << std::endl; + } + + return 0; +} diff --git a/tmva/sofie/test/TorchGNN/CatTest.cxx b/tmva/sofie/test/TorchGNN/CatTest.cxx new file mode 100644 index 0000000000000..5d703110281e5 --- /dev/null +++ b/tmva/sofie/test/TorchGNN/CatTest.cxx @@ -0,0 +1,128 @@ +/** + * Tests the TorchGNN concatenation module (RModule_Cat). + * + * To run in ROOT terminal: + * .L path_to_root/tmva/sofie/test/TorchGNN/CatTest.cxx + * main() +*/ + +#include "TMVA/TorchGNN/RModel_TorchGNN.hxx" +#include "TMVA/TorchGNN/modules/RModule_Cat.hxx" +#include + +using namespace TMVA::Experimental::SOFIE; + +int main() { + // 2x3x2. + std::vector a = { + 1, 2, + 3, 4, + 5, 6, + 7, 8, + 9, 10, + 11, 12 + }; + // 1x3x2. + std::vector b = { + -1, -2, + -3, -4, + -5, -6 + }; + + // 3x3x2. + std::vector expected = { + 1, 2, + 3, 4, + 5, 6, + 7, 8, + 9, 10, + 11, 12, + -1, -2, + -3, -4, + -5, -6 + }; + + RModel_TorchGNN model = RModel_TorchGNN({"a", "b"}, {{-1, 3, 2}, {-1, 3, 2}}); + model.AddModule(RModule_Cat("a", "b", 0), "out_1"); + std::vector out = model.Forward(a, b); + + std::cout << "Expected:" << std::endl; + for (float x: expected) { + std::cout << x << std::endl; + } + std::cout << std::endl; + std::cout << "Actual:" << std::endl; + for (float x: out) { + std::cout << x << std::endl; + } + std::cout << std::endl; + + // 2x1x2. + b = { + -1, -2, + -3, -4 + }; + + // 2x4x2. + expected = { + 1, 2, + 3, 4, + 5, 6, + -1, -2, + 7, 8, + 9, 10, + 11, 12, + -3, -4 + }; + + model = RModel_TorchGNN({"a", "b"}, {{-1, 3, 2}, {-1, 1, 2}}); + model.AddModule(RModule_Cat("a", "b", 1), "out_1"); + out = model.Forward(a, b); + + std::cout << "Expected:" << std::endl; + for (float x: expected) { + std::cout << x << std::endl; + } + std::cout << std::endl; + std::cout << "Actual:" << std::endl; + for (float x: out) { + std::cout << x << std::endl; + } + std::cout << std::endl; + + // 2x3x1. + b = { + -1, + -2, + -3, + -4, + -5, + -6 + }; + + // 2x3x3. + expected = { + 1, 2, -1, + 3, 4, -2, + 5, 6, -3, + 7, 8, -4, + 9, 10, -5, + 11, 12, -6 + }; + + model = RModel_TorchGNN({"a", "b"}, {{-1, 3, 2}, {-1, 3, 1}}); + model.AddModule(RModule_Cat("a", "b", 2), "out_1"); + out = model.Forward(a, b); + + std::cout << "Expected:" << std::endl; + for (float x: expected) { + std::cout << x << std::endl; + } + std::cout << std::endl; + std::cout << "Actual:" << std::endl; + for (float x: out) { + std::cout << x << std::endl; + } + + return 0; +} diff --git a/tmva/sofie/test/TorchGNN/GlobalMeanPoolTest.cxx b/tmva/sofie/test/TorchGNN/GlobalMeanPoolTest.cxx new file mode 100644 index 0000000000000..32b19038c9369 --- /dev/null +++ b/tmva/sofie/test/TorchGNN/GlobalMeanPoolTest.cxx @@ -0,0 +1,47 @@ +/** + * Tests the TorchGNN global mean pooling module (RModule_GlobalMeanPool). + * + * To run in ROOT terminal: + * .L path_to_root/tmva/sofie/test/TorchGNN/GlobalMeanPoolTest.cxx + * main() +*/ + +#include "TMVA/TorchGNN/RModel_TorchGNN.hxx" +#include "TMVA/TorchGNN/modules/RModule_GlobalMeanPool.hxx" +#include + +using namespace TMVA::Experimental::SOFIE; + +int main() { + std::vector X = { + 1, 2, + -1, -2, + 3, -6, + 1, 2, + 2, 1, + 8, 8 + }; + std::vector batch = {0, 0, 0, 1, 1, 2}; + + std::vector expected = { + 1, -2, + 1.5, 1.5, + 8, 8 + }; + + RModel_TorchGNN model = RModel_TorchGNN({"X", "batch"}, {{-1, 2}, {-1}}); + model.AddModule(RModule_GlobalMeanPool("X", "batch"), "out_1"); + std::vector out = model.Forward(X, batch); + + std::cout << "Expected:" << std::endl; + for (float x: expected) { + std::cout << x << std::endl; + } + std::cout << std::endl; + std::cout << "Actual:" << std::endl; + for (float x: out) { + std::cout << x << std::endl; + } + + return 0; +} diff --git a/tmva/sofie/test/TorchGNN/LinearTest.cxx b/tmva/sofie/test/TorchGNN/LinearTest.cxx new file mode 100644 index 0000000000000..b6618ab9645a5 --- /dev/null +++ b/tmva/sofie/test/TorchGNN/LinearTest.cxx @@ -0,0 +1,52 @@ +/** + * Tests the TorchGNN linear module (RModule_Linear). + * + * To run in ROOT terminal: + * .L path_to_root/tmva/sofie/test/TorchGNN/LinearTest.cxx + * main() +*/ + +#include "TMVA/TorchGNN/RModel_TorchGNN.hxx" +#include "TMVA/TorchGNN/modules/RModule_Linear.hxx" +#include + +using namespace TMVA::Experimental::SOFIE; + +int main() { + std::vector X = { + 1, 2, 3, + 3, -2, 2 + }; + std::vector A = { + 1, -1, 2, + 0, 2, 1 + }; + std::vector b = { + 0, + 0.5 + }; + + std::vector expected = { + 5, 7.5, + 9, -1.5 + }; + + RModel_TorchGNN model = RModel_TorchGNN({"X"}, {{-1, 3}}); + RModule_Linear lin = RModule_Linear("X", 3, 2); + lin.SetWeights(A); + lin.SetBiases(b); + model.AddModule(lin, "out_1"); + std::vector out = model.Forward(X); + + std::cout << "Expected:" << std::endl; + for (float x: expected) { + std::cout << x << std::endl; + } + std::cout << std::endl; + std::cout << "Actual:" << std::endl; + for (float x: out) { + std::cout << x << std::endl; + } + + return 0; +} diff --git a/tmva/sofie/test/TorchGNN/ReLUTest.cxx b/tmva/sofie/test/TorchGNN/ReLUTest.cxx new file mode 100644 index 0000000000000..797c42d894570 --- /dev/null +++ b/tmva/sofie/test/TorchGNN/ReLUTest.cxx @@ -0,0 +1,35 @@ +/** + * Tests the TorchGNN ReLU module (RModule_ReLU). + * + * To run in ROOT terminal: + * .L path_to_root/tmva/sofie/test/TorchGNN/ReLUTest.cxx + * main() +*/ + +#include "TMVA/TorchGNN/RModel_TorchGNN.hxx" +#include "TMVA/TorchGNN/modules/RModule_ReLU.hxx" +#include + +using namespace TMVA::Experimental::SOFIE; + +int main() { + std::vector a = {-2, -1, 0, 1, 2}; + + std::vector expected = {0, 0, 0, 1, 2}; + + RModel_TorchGNN model = RModel_TorchGNN({"a"}, {{-1}}); + model.AddModule(RModule_ReLU("a"), "out_1"); + std::vector out = model.Forward(a); + + std::cout << "Expected:" << std::endl; + for (float x: expected) { + std::cout << x << std::endl; + } + std::cout << std::endl; + std::cout << "Actual:" << std::endl; + for (float x: out) { + std::cout << x << std::endl; + } + + return 0; +} diff --git a/tmva/sofie/test/TorchGNN/ReshapeTest.cxx b/tmva/sofie/test/TorchGNN/ReshapeTest.cxx new file mode 100644 index 0000000000000..73f71f1bdcc10 --- /dev/null +++ b/tmva/sofie/test/TorchGNN/ReshapeTest.cxx @@ -0,0 +1,64 @@ +/** + * Tests the TorchGNN reshape module (RModule_Reshape). + * + * To run in ROOT terminal: + * .L path_to_root/tmva/sofie/test/TorchGNN/ReshapeTest.cxx + * main() +*/ + +#include "TMVA/TorchGNN/RModel_TorchGNN.hxx" +#include "TMVA/TorchGNN/modules/RModule_Cat.hxx" +#include "TMVA/TorchGNN/modules/RModule_Reshape.hxx" +#include + +using namespace TMVA::Experimental::SOFIE; + +int main() { + // 3x2. + std::vector a = { + 1, 2, + 3, 4, + 5, 6 + }; + // 1x3. + std::vector b = { + -1, -2, -3 + }; + + std::vector expected = { + 1, 2, -1, + 3, 4, -2, + 5, 6, -3 + }; + + RModel_TorchGNN model = RModel_TorchGNN({"a", "b"}, {{-1, 2}, {-1, 3}}); + std::vector shape = {-1, 1}; + model.AddModule(RModule_Reshape("b", shape), "out_1"); + model.AddModule(RModule_Cat("a", "out_1", 1), "out_2"); + std::vector out = model.Forward(a, b); + + std::cout << "Expected:" << std::endl; + for (float x: expected) { + std::cout << x << std::endl; + } + std::cout << std::endl; + std::cout << "Actual:" << std::endl; + for (float x: out) { + std::cout << x << std::endl; + } + std::cout << std::endl; + + model = RModel_TorchGNN({"a", "b"}, {{-1, 2}, {-1, 3}}); + model.AddModule(RModule_Cat("a", "b", 1), "out_1"); + + std::cout << "Invalid shapes should throw an error." << std::endl; + std::cout << std::endl; + + try { + std::vector out = model.Forward(a, b); + } catch (std::string error) { + std::cout << error << std::endl;; + } + + return 0; +} diff --git a/tmva/sofie/test/TorchGNN/SaveTest.cxx b/tmva/sofie/test/TorchGNN/SaveTest.cxx new file mode 100644 index 0000000000000..ed44526533602 --- /dev/null +++ b/tmva/sofie/test/TorchGNN/SaveTest.cxx @@ -0,0 +1,25 @@ +/** + * Tests the saving functionality of TorchGNN. + * + * To run in ROOT terminal: + * .L path_to_root/tmva/sofie/test/TorchGNN/SaveTest.cxx + * main() +*/ + +#include "TMVA/TorchGNN/RModel_TorchGNN.hxx" +#include "TMVA/TorchGNN/modules/RModule_Add.hxx" + +using namespace TMVA::Experimental::SOFIE; + +int main() { + std::vector a = {1, 1.5, 2, 2.5}; + std::vector b = {1, 2, 4, 8}; + std::vector c = {0, 0.5, 0, 0.5}; + + RModel_TorchGNN model = RModel_TorchGNN({"a", "b", "c"}, {{-1}, {-1}, {-1}}); + model.AddModule(RModule_Add("a", "b"), "out_1"); + model.AddModule(RModule_Add("out_1", "c"), "out_2"); + model.Save("/home/stefan/root-model", "Model", true); + + return 0; +} diff --git a/tmva/sofie/test/TorchGNN/SoftmaxTest.cxx b/tmva/sofie/test/TorchGNN/SoftmaxTest.cxx new file mode 100644 index 0000000000000..9607f754dad67 --- /dev/null +++ b/tmva/sofie/test/TorchGNN/SoftmaxTest.cxx @@ -0,0 +1,47 @@ +/** + * Tests the TorchGNN softmax module (RModule_Softmax). + * + * To run in ROOT terminal: + * .L path_to_root/tmva/sofie/test/TorchGNN/SoftmaxTest.cxx + * main() +*/ + +#include "TMVA/TorchGNN/RModel_TorchGNN.hxx" +#include "TMVA/TorchGNN/modules/RModule_Softmax.hxx" +#include + +using namespace TMVA::Experimental::SOFIE; + +int main() { + // 4x3. + std::vector a = { + 0, 3, -2, + 0, 3, -2, + 1, 2, 3, + 10, -1, -1 + }; + + // 4x3. + std::vector expected = { + 0.0471, 0.9465, 0.0064, + 0.0471, 0.9465, 0.0064, + 0.0900, 0.2447, 0.6652, + 1.0000, 0.0000, 0.0000 + }; + + RModel_TorchGNN model = RModel_TorchGNN({"a"}, {{-1, 3}}); + model.AddModule(RModule_Softmax("a"), "out_1"); + std::vector out = model.Forward(a); + + std::cout << "Expected:" << std::endl; + for (float x: expected) { + std::cout << x << std::endl; + } + std::cout << std::endl; + std::cout << "Actual:" << std::endl; + for (float x: out) { + std::cout << x << std::endl; + } + + return 0; +}