diff --git a/.github/workflows/tests_clang.yml b/.github/workflows/tests_clang.yml index 1b6c3ef47e6..5b2480a6dc6 100644 --- a/.github/workflows/tests_clang.yml +++ b/.github/workflows/tests_clang.yml @@ -44,6 +44,7 @@ jobs: -DSINGULARITY_TEST_HELMHOLTZ=ON \ -DSINGULARITY_FORCE_SUBMODULE_MODE=ON \ -DSINGULARITY_USE_V_AND_V_EOS=OFF \ + -DSINGULARITY_VECTOR_CAPTURE_BY_REFERENCE=ON \ -DSINGULARITY_USE_KOKKOS=OFF \ -DSINGULARITY_PLUGINS=$(pwd)/../example/plugin \ -DCMAKE_LINKER=ld.gold \ diff --git a/CHANGELOG.md b/CHANGELOG.md index 551ec87511a..5542f929b14 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## Current develop ### Added (new features/APIs/variables/...) +- [[PR511]](https://github.com/lanl/singularity-eos/pull/511) Added ability to sometimes capture by reference in vector API - [[PR506]](https://github.com/lanl/singularity-eos/pull/506) Add two examples related to PTE ### Fixed (Repair bugs, etc) diff --git a/CMakeLists.txt b/CMakeLists.txt index ef020c2da49..7f3ce76828a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -69,6 +69,9 @@ cmake_dependent_option( SINGULARITY_USE_KOKKOSKERNELS "Use KokkosKernels for LA routines" ON "SINGULARITY_USE_KOKKOS;SINGULARITY_BUILD_CLOSURE" OFF) +option(SINGULARITY_VECTOR_CAPTURE_BY_REFERENCE + "Capture by reference in vector API on CPUs when possible" OFF) + # extra build options option(SINGULARITY_BUILD_PYTHON "Compile Python bindings" OFF) option(SINGULARITY_BUILD_EXAMPLES "Compile examples" OFF) @@ -361,6 +364,13 @@ endif() if (SINGULARITY_USE_EOSPAC AND SINGULARITY_EOSPAC_ENABLE_SHMEM) target_compile_definitions(singularity-eos_Interface INTERFACE SINGULARITY_EOSPAC_ENABLE_SHARED_MEMORY) endif() +if (SINGULARITY_VECTOR_CAPTURE_BY_REFERENCE) + message(WARNING "Vector capture by reference is ON for CPU builds. " + "This is not technically valid Kokkos and may cause issues in edge cases. " + "To disable, set -DSINGULARITY_VECTOR_CAPTURE_BY_REFERENCE=OFF.") + target_compile_definitions(singularity-eos_Interface INTERFACE + SINGULARITY_VECTOR_CAPTURE_BY_REFERENCE) +endif() # ------------------------------------------------------------------------------# # Handle dependencies diff --git a/doc/sphinx/src/building.rst b/doc/sphinx/src/building.rst index 29a7b2709d8..0f8c6603bf8 100644 --- a/doc/sphinx/src/building.rst +++ b/doc/sphinx/src/building.rst @@ -98,30 +98,31 @@ sections detailing those build modes. The main CMake options to configure building are in the following table: -====================================== ======= =========================================== - Option Default Comment -====================================== ======= =========================================== - ``SINGULARITY_USE_SPINER`` ON Enables EOS objects that use ``spiner``. - ``SINGULARITY_USE_FORTRAN`` ON Enable Fortran API for equation of state. - ``SINGULARITY_USE_KOKKOS`` OFF Uses Kokkos as the portability backend. Currently only Kokkos is supported for GPUs. - ``SINGULARITY_USE_EOSPAC`` OFF Link against EOSPAC. Needed for sesame2spiner and some tests. - ``SINGULARITY_EOSPAC_ENABLE_SHMEM`` OFF Enable shared memory support in EOSPAC backend. - ``SINGULARITY_BUILD_CLOSURE`` OFF Build the mixed cell closure models - ``SINGULARITY_BUILD_TESTS`` OFF Build test infrastructure. - ``SINGULARITY_BUILD_PYTHON`` OFF Build Python bindings. - ``SINGULARITY_BUILD_EXAMPLES`` OFF Build examples of ``singularity-eos`` in use. - ``SINGULARITY_INVERT_AT_SETUP`` OFF For tests, pre-invert eospac tables. - ``SINGULARITY_BETTER_DEBUG_FLAGS`` ON Enables nicer GPU debug flags. May interfere with in-tree builds as a submodule. - ``SINGULARITY_HIDE_MORE_WARNINGS`` OFF Makes warnings less verbose. May interfere with in-tree builds as a submodule. - ``SINGULARITY_FORCE_SUBMODULE_MODE`` OFF Force build in _submodule_ mode. - ``SINGULARITY_USE_TRUE_LOG_GRIDDING`` OFF Use grids that conform to logarithmic spacing. - ``SINGULARITY_USE_SINGLE_LOGS`` OFF Use single precision logarithms (may degrade accuracy). - ``SINGULARITY_NQT_ORDER_1`` OFF For fast logs, use the less accurate but faster 1st-order version. - ``SINGULARITY_NQT_PORTABLE`` OFF For fast logs, use the slower but endianness-independent implementation. - ``SINGULARITY_STRICT_WARNINGS`` OFF For testing. Adds -Wall and -Werror to builds. - ``SINGULARITY_USE_V_AND_V_EOS`` OFF Enables several additional EOS models and adds them to the default variant - ``SINGULARITY_USE_STELLAR_COLLAPSE`` OFF Adds the Stellar Collapse EOS to the default variant -====================================== ======= =========================================== +============================================= ======= =========================================== + Option Default Comment +============================================= ======= =========================================== + ``SINGULARITY_USE_SPINER`` ON Enables EOS objects that use ``spiner``. + ``SINGULARITY_USE_FORTRAN`` ON Enable Fortran API for equation of state. + ``SINGULARITY_USE_KOKKOS`` OFF Uses Kokkos as the portability backend. Currently only Kokkos is supported for GPUs. + ``SINGULARITY_USE_EOSPAC`` OFF Link against EOSPAC. Needed for sesame2spiner and some tests. + ``SINGULARITY_EOSPAC_ENABLE_SHMEM`` OFF Enable shared memory support in EOSPAC backend. + ``SINGULARITY_BUILD_CLOSURE`` OFF Build the mixed cell closure models + ``SINGULARITY_BUILD_TESTS`` OFF Build test infrastructure. + ``SINGULARITY_BUILD_PYTHON`` OFF Build Python bindings. + ``SINGULARITY_BUILD_EXAMPLES`` OFF Build examples of ``singularity-eos`` in use. + ``SINGULARITY_INVERT_AT_SETUP`` OFF For tests, pre-invert eospac tables. + ``SINGULARITY_BETTER_DEBUG_FLAGS`` ON Enables nicer GPU debug flags. May interfere with in-tree builds as a submodule. + ``SINGULARITY_HIDE_MORE_WARNINGS`` OFF Makes warnings less verbose. May interfere with in-tree builds as a submodule. + ``SINGULARITY_FORCE_SUBMODULE_MODE`` OFF Force build in _submodule_ mode. + ``SINGULARITY_USE_TRUE_LOG_GRIDDING`` OFF Use grids that conform to logarithmic spacing. + ``SINGULARITY_USE_SINGLE_LOGS`` OFF Use single precision logarithms (may degrade accuracy). + ``SINGULARITY_NQT_ORDER_1`` OFF For fast logs, use the less accurate but faster 1st-order version. + ``SINGULARITY_NQT_PORTABLE`` OFF For fast logs, use the slower but endianness-independent implementation. + ``SINGULARITY_STRICT_WARNINGS`` OFF For testing. Adds -Wall and -Werror to builds. + ``SINGULARITY_USE_V_AND_V_EOS`` OFF Enables several additional EOS models and adds them to the default variant + ``SINGULARITY_USE_STELLAR_COLLAPSE`` OFF Adds the Stellar Collapse EOS to the default variant + ``SINGULARITY_VECTOR_CAPTURE_BY_REFERENCE`` OFF A performance trick for CPU-only builds. May improve vector-API calls for small vector lengths. Enable at your own risk! +============================================= ======= =========================================== More options are available to modify only if certain other options or variables satisfy certain conditions (*dependent options*). *Dependent diff --git a/singularity-eos/eos/eos_base.hpp b/singularity-eos/eos/eos_base.hpp index 2eff349b450..93b253587ee 100644 --- a/singularity-eos/eos/eos_base.hpp +++ b/singularity-eos/eos/eos_base.hpp @@ -65,6 +65,26 @@ char *StrCat(char *destination, const char *source) { } } // namespace impl +// JMM: This is a little trick to skip copying EOS's when not +// necessary. It changes the semantics of the lambda to capture by +// reference *only* on CPU-only builds. Should buy performance, but is +// a bit of a hack. I consider it dangerous because it changes the +// semantics/memnory model between CPU and GPU, which may be difficult +// to debug. It is thus off by defualt, but can be turned on. +#if defined(SINGULARITY_VECTOR_CAPTURE_BY_REFERENCE) && !defined(__CUDACC__) && \ + !defined(__HIPCC__) +#if !defined(PORTABILITY_STRATEGY_KOKKOS) && !defined(PORTABILITY_STRATEGY_NONE) +#error \ + "Unexpected portability strategy! Please set SINGULARITY_VECTOR_CAPTURE_BY_REFERENCE=OFF to build in this mode." +#endif // unexpected portability strategy! +// JMM: I am hoping that by not adding any decorators here, we will +// catch problems with unexpected, non-Cuda, non-HIP accelerator +// architectures. +#define SG_VEC_LAMBDA [&] +#else +#define SG_VEC_LAMBDA PORTABLE_LAMBDA +#endif // SINGULARITY_VECTOR_CAPTURE_BY_REFERENCE + // This Macro adds the `using` statements that allow for the base class // VECTOR functionality to overload the scalar implementations in the derived // classes. Do not add functions here that are not overloads of derived class features. @@ -274,7 +294,7 @@ class EosBase { static auto const cname = name.c_str(); const CRTP © = *(static_cast(this)); portableFor( - cname, 0, num, PORTABLE_LAMBDA(const int i) { + cname, 0, num, SG_VEC_LAMBDA(const int i) { temperatures[i] = copy.TemperatureFromDensityInternalEnergy(rhos[i], sies[i], lambdas[i]); }); @@ -309,7 +329,7 @@ class EosBase { static auto const cname = name.c_str(); const CRTP © = *(static_cast(this)); portableFor( - cname, 0, num, PORTABLE_LAMBDA(const int i) { + cname, 0, num, SG_VEC_LAMBDA(const int i) { sies[i] = copy.InternalEnergyFromDensityTemperature(rhos[i], temperatures[i], lambdas[i]); }); @@ -344,7 +364,7 @@ class EosBase { static auto const cname = name.c_str(); const CRTP © = *(static_cast(this)); portableFor( - cname, 0, num, PORTABLE_LAMBDA(const int i) { + cname, 0, num, SG_VEC_LAMBDA(const int i) { pressures[i] = copy.PressureFromDensityTemperature(rhos[i], temperatures[i], lambdas[i]); }); @@ -377,7 +397,7 @@ class EosBase { static auto const cname = name.c_str(); const CRTP © = *(static_cast(this)); portableFor( - cname, 0, num, PORTABLE_LAMBDA(const int i) { + cname, 0, num, SG_VEC_LAMBDA(const int i) { pressures[i] = copy.PressureFromDensityInternalEnergy(rhos[i], sies[i], lambdas[i]); }); @@ -408,7 +428,7 @@ class EosBase { static auto const cname = name.c_str(); const CRTP © = *(static_cast(this)); portableFor( - cname, 0, num, PORTABLE_LAMBDA(const int i) { + cname, 0, num, SG_VEC_LAMBDA(const int i) { sies[i] = copy.MinInternalEnergyFromDensity(rhos[i], lambdas[i]); }); } @@ -438,7 +458,7 @@ class EosBase { static auto const cname = name.c_str(); const CRTP © = *(static_cast(this)); portableFor( - cname, 0, num, PORTABLE_LAMBDA(const int i) { + cname, 0, num, SG_VEC_LAMBDA(const int i) { entropies[i] = copy.EntropyFromDensityTemperature(rhos[i], temperatures[i], lambdas[i]); }); @@ -471,7 +491,7 @@ class EosBase { static auto const cname = name.c_str(); const CRTP © = *(static_cast(this)); portableFor( - cname, 0, num, PORTABLE_LAMBDA(const int i) { + cname, 0, num, SG_VEC_LAMBDA(const int i) { entropies[i] = copy.EntropyFromDensityInternalEnergy(rhos[i], sies[i], lambdas[i]); }); @@ -503,7 +523,7 @@ class EosBase { static auto const cname = name.c_str(); const CRTP © = *(static_cast(this)); portableFor( - cname, 0, num, PORTABLE_LAMBDA(const int i) { + cname, 0, num, SG_VEC_LAMBDA(const int i) { cvs[i] = copy.SpecificHeatFromDensityTemperature(rhos[i], temperatures[i], lambdas[i]); }); @@ -538,7 +558,7 @@ class EosBase { static auto const cname = name.c_str(); const CRTP © = *(static_cast(this)); portableFor( - cname, 0, num, PORTABLE_LAMBDA(const int i) { + cname, 0, num, SG_VEC_LAMBDA(const int i) { cvs[i] = copy.SpecificHeatFromDensityInternalEnergy(rhos[i], sies[i], lambdas[i]); }); @@ -571,7 +591,7 @@ class EosBase { static auto const cname = name.c_str(); const CRTP © = *(static_cast(this)); portableFor( - cname, 0, num, PORTABLE_LAMBDA(const int i) { + cname, 0, num, SG_VEC_LAMBDA(const int i) { bmods[i] = copy.BulkModulusFromDensityTemperature(rhos[i], temperatures[i], lambdas[i]); }); @@ -606,7 +626,7 @@ class EosBase { static auto const cname = name.c_str(); const CRTP © = *(static_cast(this)); portableFor( - cname, 0, num, PORTABLE_LAMBDA(const int i) { + cname, 0, num, SG_VEC_LAMBDA(const int i) { bmods[i] = copy.BulkModulusFromDensityInternalEnergy(rhos[i], sies[i], lambdas[i]); }); @@ -638,7 +658,7 @@ class EosBase { static auto const cname = name.c_str(); CRTP copy = *(static_cast(this)); portableFor( - cname, 0, num, PORTABLE_LAMBDA(const int i) { + cname, 0, num, SG_VEC_LAMBDA(const int i) { gm1s[i] = copy.GruneisenParamFromDensityTemperature(rhos[i], temperatures[i], lambdas[i]); }); @@ -673,7 +693,7 @@ class EosBase { static auto const cname = name.c_str(); CRTP copy = *(static_cast(this)); portableFor( - cname, 0, num, PORTABLE_LAMBDA(const int i) { + cname, 0, num, SG_VEC_LAMBDA(const int i) { gm1s[i] = copy.GruneisenParamFromDensityInternalEnergy(rhos[i], sies[i], lambdas[i]); }); @@ -707,7 +727,7 @@ class EosBase { static auto const cname = name.c_str(); CRTP copy = *(static_cast(this)); portableFor( - cname, 0, num, PORTABLE_LAMBDA(const int i) { + cname, 0, num, SG_VEC_LAMBDA(const int i) { Gs[i] = copy.GibbsFreeEnergyFromDensityTemperature(rhos[i], Ts[i], lambdas[i]); }); } @@ -739,7 +759,7 @@ class EosBase { static auto const cname = name.c_str(); CRTP copy = *(static_cast(this)); portableFor( - cname, 0, num, PORTABLE_LAMBDA(const int i) { + cname, 0, num, SG_VEC_LAMBDA(const int i) { Gs[i] = copy.GibbsFreeEnergyFromDensityInternalEnergy(rhos[i], sies[i], lambdas[i]); }); @@ -774,7 +794,7 @@ class EosBase { static auto const cname = name.c_str(); CRTP copy = *(static_cast(this)); portableFor( - cname, 0, num, PORTABLE_LAMBDA(const int i) { + cname, 0, num, SG_VEC_LAMBDA(const int i) { copy.FillEos(rhos[i], temps[i], energies[i], presses[i], cvs[i], bmods[i], output, lambdas[i]); }); @@ -1032,5 +1052,6 @@ class EosBase { } // namespace eos_base } // namespace singularity +#undef SG_VEC_LAMBDA #undef SG_MEMBER_FUNC_NAME #endif diff --git a/spack-repo/packages/singularity-eos/package.py b/spack-repo/packages/singularity-eos/package.py index ffc47b8d266..c17291cc153 100644 --- a/spack-repo/packages/singularity-eos/package.py +++ b/spack-repo/packages/singularity-eos/package.py @@ -87,6 +87,8 @@ class SingularityEos(CMakePackage, CudaPackage, ROCmPackage): variant("closure", default=True, description="Build closure module") variant("shared", default=False, description="Build shared libs") variant("vandv", default=True, description="Enable V&V EOSs in default Singularity::Variant") + variant("vecref", default=False, + description="When possible capture by reference in vector API. Performance optimization.") plugins = {} @@ -210,6 +212,7 @@ def cmake_args(self): self.define_from_variant("SINGULARITY_USE_SPINER_WITH_HDF5", "hdf5"), self.define_from_variant("BUILD_SHARED_LIBS", "shared"), self.define_from_variant("SINGULARITY_USE_V_AND_V_EOS", "vandv"), + self.define_from_variant("SINGULARITY_VECTOR_CAPTURE_BY_REFERENCE", "vecref"), self.define("SINGULARITY_BUILD_TESTS", self.run_tests), self.define( "SINGULARITY_BUILD_SESAME2SPINER",