diff --git a/.clang-format b/.clang-format index ba886a47c..b1990c009 100644 --- a/.clang-format +++ b/.clang-format @@ -82,7 +82,7 @@ IncludeBlocks: Regroup IncludeCategories: - Regex: '^"plssvm/' Priority: 1 - - Regex: '^"(cuda|hip|CL|sycl|omp|hpx|Kokkos)' + - Regex: '^"(cuda|driver_types|vector_types|hip|CL|sycl|omp|hpx|Kokkos)' Priority: 2 - Regex: '^"(tests|bindings)/' Priority: 3 diff --git a/.clang-tidy b/.clang-tidy index b8ef4aa69..872696e54 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -1,47 +1,58 @@ -# Generated from CLion Inspection settings --- -Checks: '-*, +Checks: ' +bugprone-*, +-bugprone-branch-clone, +-bugprone-easily-swappable-parameters, +-bugprone-exception-escape, +-bugprone-macro-parentheses, + cert-*, -misc-*, -mpi-*, + +clang-analyzer-*, +-clang-analyzer-optin.core.EnumCastOutOfRange, + cppcoreguidelines-*, --cppcoreguidelines-avoid-do-while, --cppcoreguidelines-avoid-magic-numbers, --cppcoreguidelines-pro-bounds-pointer-arithmetic, +-cppcoreguidelines-macro-usage, +-cppcoreguidelines-non-private-member-variables-in-classes, -cppcoreguidelines-pro-bounds-constant-array-index, -google-default-arguments, -google-runtime-operator, -google-explicit-constructor, -hicpp-multiway-paths-covered, -hicpp-exception-baseclass, +-cppcoreguidelines-pro-bounds-pointer-arithmetic, + +google-*, +-google-readability-todo, +-google-runtime-int, + +hicpp-*, +-hicpp-uppercase-literal-suffix, + +misc-*, +-misc-non-private-member-variables-in-classes, +-misc-no-recursion, + modernize-*, -modernize-use-trailing-return-type, --modernize-avoid-c-arrays, --modernize-use-using, --modernize-use-default-member-init, --modernize-macro-to-enum, -portability-simd-intrinsics, +-modernize-type-traits, + +mpi-*, + +openmp-*, +-openmp-exception-escape, +-openmp-use-default-none, + +performance-*, +-performance-avoid-endl, +-performance-enum-size, + +portability-*, +-portability-avoid-pragma-once, + readability-*, --readability-redundant-preprocessor, --readability-named-parameter, --readability-function-size, --readability-simplify-boolean-expr, +-readability-convert-member-functions-to-static, +-readability-function-cognitive-complexity, -readability-identifier-length, --readability-duplicate-include, --readability-magic-numbers, --readability-braces-around-statements, --readability-redundant-member-init, +-readability-math-missing-parentheses, -readability-suspicious-call-argument, --readability-qualified-auto, --readability-isolate-declaration, -readability-uppercase-literal-suffix, --readability-container-data-pointer, --readability-else-after-return, --readability-redundant-access-specifiers, --readability-function-cognitive-complexity, --readability-implicit-bool-conversion, --readability-container-contains, --readability-identifier-naming, -bugprone-*' +' -CheckOptions: { misc-non-private-member-variables-in-classes.IgnoreClassesWithAllMemberVariablesBeingPublic: true } +UseColor: true +HeaderFilterRegex: ".*(include\\/plssvm|bindings\\/Python|tests).*" \ No newline at end of file diff --git a/.github/workflows/clang_gcc_linux.yml b/.github/workflows/clang_gcc_linux.yml index a57ed8b35..3d3db48d2 100644 --- a/.github/workflows/clang_gcc_linux.yml +++ b/.github/workflows/clang_gcc_linux.yml @@ -27,7 +27,7 @@ jobs: - name: "Install cmake 3.31.0" uses: lukka/get-cmake@v3.31.0 - name: "Clone the PLSSVM repository into PLSSVM/" - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v6.0.1 with: path: PLSSVM - name: "Install Python dependencies" @@ -37,7 +37,7 @@ jobs: - name: "Configure PLSSVM using CMake" run: | cd PLSSVM - cmake --preset openmp_test -DCMAKE_CXX_COMPILER=${{ matrix.compiler }} -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DPLSSVM_TARGET_PLATFORMS="cpu" -DPLSSVM_ENABLE_LANGUAGE_BINDINGS=ON -DPLSSVM_ENABLE_PERFORMANCE_TRACKING=ON -DPLSSVM_ENABLE_MPI=ON -DPLSSVM_TEST_FILE_NUM_DATA_POINTS=50 -DPLSSVM_TEST_FILE_NUM_FEATURES=20 -DPLSSVM_ENABLE_LTO=OFF + cmake --preset openmp_test -DCMAKE_CXX_COMPILER=${{ matrix.compiler }} -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DPLSSVM_ENABLE_STL_DEBUG_MODE=${{ matrix.build_type == 'Debug' && 'ON' || 'OFF' }} -DPLSSVM_TARGET_PLATFORMS="cpu" -DPLSSVM_ENABLE_LANGUAGE_BINDINGS=ON -DPLSSVM_ENABLE_PERFORMANCE_TRACKING=ON -DPLSSVM_ENABLE_MPI=ON -DPLSSVM_TEST_FILE_NUM_DATA_POINTS=50 -DPLSSVM_TEST_FILE_NUM_FEATURES=20 -DPLSSVM_ENABLE_LTO=OFF - name: "Build PLSSVM" run: | cd PLSSVM @@ -48,4 +48,4 @@ jobs: cd PLSSVM mkdir tmp export TMPDIR=$PWD/tmp - ctest --preset openmp_test -C ${{ matrix.build_type }} --parallel 2 \ No newline at end of file + ctest --preset openmp_test -C ${{ matrix.build_type }} --parallel 2 --output-on-failure \ No newline at end of file diff --git a/.github/workflows/clang_macos.yml b/.github/workflows/clang_macos.yml index 6c3269609..7b2e882e4 100644 --- a/.github/workflows/clang_macos.yml +++ b/.github/workflows/clang_macos.yml @@ -7,51 +7,41 @@ on: workflow_dispatch: jobs: macOS-Test: - runs-on: macos-13 + runs-on: macos-15-intel strategy: matrix: build_type: [Debug, Release] steps: - name: "Install cmake 3.31.0" uses: lukka/get-cmake@v3.31.0 - - name: "Install LLVM and Clang" - uses: KyleMayes/install-llvm-action@v2 - with: - version: "15.0" - name: "Install dependencies via brew" run: | - brew install python3 - brew install numpy brew install libomp + - name: "Set up Python" + uses: actions/setup-python@v6.1.0 + with: + python-version: "3.12" - name: "Install Python dependencies" run: | - python3 -m pip install -U pip - pip install argparse scikit-learn humanize --break-system-packages - - name: "Set PYTHONPATH" - run: | - export PYTHONPATH=$(python3 -c "import site; print(site.getsitepackages()[0])") - echo "PYTHONPATH=$PYTHONPATH" >> $GITHUB_ENV + pip install --upgrade pip setuptools wheel + pip install numpy scikit-learn humanize - name: "Clone the PLSSVM repository into PLSSVM/" - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v6.0.1 with: path: PLSSVM - name: "Configure PLSSVM using CMake" run: | cd PLSSVM - export LDFLAGS="-L/opt/homebrew/opt/libomp/lib" - export CPPFLAGS="-I/opt/homebrew/opt/libomp/include" - cmake --preset openmp_test -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DPLSSVM_TARGET_PLATFORMS="cpu" -DPLSSVM_ENABLE_LANGUAGE_BINDINGS=ON -DPLSSVM_ENABLE_PERFORMANCE_TRACKING=ON -DPLSSVM_ENABLE_MPI=OFF -DPLSSVM_TEST_FILE_NUM_DATA_POINTS=50 -DPLSSVM_TEST_FILE_NUM_FEATURES=20 -DPLSSVM_ENABLE_LTO=OFF + cmake --preset openmp_test -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DPLSSVM_TARGET_PLATFORMS="cpu" -DPLSSVM_ENABLE_LANGUAGE_BINDINGS=ON -DPLSSVM_ENABLE_PERFORMANCE_TRACKING=ON -DPLSSVM_ENABLE_MPI=OFF -DPLSSVM_TEST_FILE_NUM_DATA_POINTS=50 -DPLSSVM_TEST_FILE_NUM_FEATURES=20 -DPLSSVM_ENABLE_LTO=OFF -DOpenMP_CXX_FLAGS="-Xpreprocessor -fopenmp" -DOpenMP_CXX_LIB_NAMES="omp" -DOpenMP_omp_LIBRARY="/usr/local/opt/libomp/lib/libomp.dylib" -DOpenMP_CXX_INCLUDE_DIRS="/usr/local/opt/libomp/include" - name: "Build PLSSVM" shell: bash run: | cd PLSSVM - export LDFLAGS="-L/opt/homebrew/opt/libomp/lib" - export CPPFLAGS="-I/opt/homebrew/opt/libomp/include" + export CPLUS_INCLUDE_PATH="/usr/local/opt/libomp/include":$CPLUS_INCLUDE_PATH cmake --build --preset openmp_test --config ${{ matrix.build_type }} echo "${GITHUB_WORKSPACE}/PLSSVM/build" >> $GITHUB_PATH - name: "Run tests" run: | cd PLSSVM - export LDFLAGS="-L/opt/homebrew/opt/libomp/lib" - export CPPFLAGS="-I/opt/homebrew/opt/libomp/include" - ctest --preset openmp_test -C ${{ matrix.build_type }} -E ".*executable.*" --parallel 2 + export LD_LIBRARY_PATH="/usr/local/opt/libomp/lib":$LD_LIBRARY_PATH + ctest --preset openmp_test -C ${{ matrix.build_type }} -E ".*Executable.*" --parallel 2 --output-on-failure diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index 207971823..9c6dbf115 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -16,7 +16,7 @@ jobs: steps: # checkout repository - name: "Checkout PLSSVM" - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v6.0.1 with: path: PLSSVM # install dependencies @@ -25,8 +25,8 @@ jobs: sudo apt update sudo apt-get install -y doxygen graphviz # install new CMake version - - name: "Install cmake 3.31.0" - uses: lukka/get-cmake@v3.31.0 + - name: "Install cmake 4.2.1" + uses: lukka/get-cmake@v4.2.1 # configure project via CMake - name: "Configure" run: | diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index 8632723be..d5ffaa027 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -13,7 +13,7 @@ jobs: steps: # checkout repository - name: "Checkout PLSSVM" - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v6.0.1 with: path: PLSSVM # install dependencies @@ -22,8 +22,8 @@ jobs: sudo apt install libomp-dev clang-format pip install "git+https://github.com/vancraar/cmake_format@master" # install new CMake version - - name: "Install cmake 3.31.0" - uses: lukka/get-cmake@v3.31.0 + - name: "Install cmake 4.2.1" + uses: lukka/get-cmake@v4.2.1 # configure project via CMake - name: "Configure" run: | @@ -47,7 +47,7 @@ jobs: # upload the clang-format git patch, if available - name: "Upload clang-format patch" if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6.0.0 with: name: clang-format-patch path: PLSSVM/clang-format-patch.txt @@ -70,7 +70,7 @@ jobs: # upload the cmake-format git patch, if available - name: "Upload cmake-format patch" if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6.0.0 with: name: cmake-format-patch path: PLSSVM/cmake-format-patch.txt diff --git a/.github/workflows/msvc_windows.yml b/.github/workflows/msvc_windows.yml index 537fb15de..46f1e0016 100644 --- a/.github/workflows/msvc_windows.yml +++ b/.github/workflows/msvc_windows.yml @@ -17,7 +17,7 @@ jobs: - name: "Install cmake 3.31.0" uses: lukka/get-cmake@v3.31.0 - name: "Clone the PLSSVM repository into PLSSVM/" - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v6.0.1 with: path: PLSSVM - name: "Install Python dependencies" @@ -27,7 +27,7 @@ jobs: - name: "Configure PLSSVM using CMake" run: | cd PLSSVM - cmake --preset openmp_test -DCMAKE_CONFIGURATION_TYPES=${{ matrix.build_type }} -DPLSSVM_TARGET_PLATFORMS="cpu" -DPLSSVM_ENABLE_LANGUAGE_BINDINGS=ON -DPLSSVM_ENABLE_PERFORMANCE_TRACKING=ON -DPLSSVM_ENABLE_MPI=OFF -DPLSSVM_TEST_FILE_NUM_DATA_POINTS=50 -DPLSSVM_TEST_FILE_NUM_FEATURES=20 + cmake --preset openmp_test -DCMAKE_CONFIGURATION_TYPES=${{ matrix.build_type }} -DPLSSVM_ENABLE_STL_DEBUG_MODE=${{ matrix.build_type == 'Debug' && 'ON' || 'OFF' }} -DPLSSVM_TARGET_PLATFORMS="cpu" -DPLSSVM_ENABLE_LANGUAGE_BINDINGS=ON -DPLSSVM_ENABLE_PERFORMANCE_TRACKING=ON -DPLSSVM_ENABLE_MPI=OFF -DPLSSVM_TEST_FILE_NUM_DATA_POINTS=50 -DPLSSVM_TEST_FILE_NUM_FEATURES=20 - name: "Build PLSSVM" shell: bash run: | @@ -37,4 +37,4 @@ jobs: - name: "Run tests" run: | cd PLSSVM - ctest --preset openmp_test -C ${{ matrix.build_type }} -E ".*executable.*" --parallel 2 + ctest --preset openmp_test -C ${{ matrix.build_type }} -E ".*Executable.*" --parallel 2 --output-on-failure diff --git a/.github/workflows/pip.yml b/.github/workflows/pip.yml index 7ced58ffa..de092a843 100644 --- a/.github/workflows/pip.yml +++ b/.github/workflows/pip.yml @@ -21,11 +21,11 @@ jobs: run: | sudo apt install libopenmpi-dev - name: "Clone the PLSSVM repository into PLSSVM/" - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v6.0.1 with: path: PLSSVM - name: "Set up Python" - uses: actions/setup-python@v5 + uses: actions/setup-python@v6.1.0 with: python-version: "3.11" - name: "Create and activate virtual environment" diff --git a/.gitignore b/.gitignore index ec61ae3c9..3085db502 100644 --- a/.gitignore +++ b/.gitignore @@ -75,9 +75,10 @@ docs/* !docs/plssvm-scale.1.in # data in test folder -!tests/data/ tests/data/* -!tests/data/model/ -!tests/data/libsvm/ -!tests/data/arff/ -!tests/data/predict/ \ No newline at end of file + +# model files +*.model + +# auto-generated AdaptiveCpp directory for JIT related information +.acpp-* diff --git a/CMakeLists.txt b/CMakeLists.txt index 8b5c16f86..a864d8f17 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,6 +13,9 @@ project( DESCRIPTION "A Least Squares Support Vector Machine implementation using different backends." ) +# generate a compile_commands.json file (used by clang-tidy and other tools) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON CACHE INTERNAL "Generate a compile_commands.json file." FORCE) + # include some generally used utility scripts include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/add_coverage_build_type.cmake) include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/utility_macros.cmake) @@ -78,14 +81,15 @@ endif () # set base sources set(PLSSVM_BASE_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/backends/Kokkos/execution_space.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/backends/SYCL/data_parallel_kernels.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/backends/SYCL/implementation_types.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/backends/SYCL/kernel_invocation_types.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/backends/stdpar/implementation_types.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/backends/execution_range.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/data_set/min_max_scaler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/detail/cmd/parser_predict.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/detail/cmd/parser_scale.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/detail/cmd/parser_train.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/detail/cmd/utility.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/detail/io/file_reader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/detail/data_distribution.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/detail/memory_size.cpp @@ -298,7 +302,7 @@ list( ######################################################################################################################## # coverage analysis only possible with the Coverage CMAKE_BUILD_TYPE if (uppercase_CMAKE_BUILD_TYPE MATCHES COVERAGE) - list(APPEND CMAKE_MESSAGE_INDENT "coverage: ") + list(APPEND CMAKE_MESSAGE_INDENT "Coverage: ") # must be linux if (WIN32 OR APPLE) message(FATAL_ERROR "Only Linux is supported for the coverage analysis.") @@ -333,7 +337,7 @@ if (uppercase_CMAKE_BUILD_TYPE MATCHES COVERAGE) set(PLSSVM_DEMANGLE_USING_CPPFILT --demangle-cpp) endif () - # Create the coverage target. Run coverage tests with 'ctest --build . --target coverage' + # Create the coverage target. Run coverage tests with 'cmake --build --preset XXX --target coverage'. set(PLSSVM_COVERAGE_REPORT_OUTPUT_DIRECTORY "coverage_report") add_custom_target( coverage @@ -373,11 +377,11 @@ if (uppercase_CMAKE_BUILD_TYPE MATCHES COVERAGE) # add custom target `make clean_coverage` which calls `make clean` and also removes all generate *.gcda and *.gcno files add_custom_target(clean_coverage) add_custom_command( - DEPENDS clean + POST_BUILD TARGET clean_coverage COMMENT "remove all coverage files" COMMAND ${CMAKE_MAKE_PROGRAM} clean COMMAND ${CMAKE_COMMAND} -E remove -f coverage.info coverage_init.info coverage_tests.info ${PLSSVM_COVERAGE_REPORT_OUTPUT_DIRECTORY} - COMMAND ${CMAKE_COMMAND} -P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/delete_coverage_files.cmake" TARGET clean_coverage + COMMAND ${CMAKE_COMMAND} -P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/delete_coverage_files.cmake" ) list(POP_BACK CMAKE_MESSAGE_INDENT) endif () @@ -638,6 +642,34 @@ if (PLSSVM_ENABLE_LTO) endif () endif () +######################################################################################################################## +# enable the requested vectorization widths for the auto-vectorizers # +######################################################################################################################## +# GCC and clang both do not automatically auto-vectorize for AVX-512 (only AVX2) +# -> enable it if "cpu:avx512" was passed as PLSSVM_TARGET_PLATFORMS +if (PLSSVM_NUM_CPU_TARGET_ARCHS EQUAL 1) + if (${PLSSVM_CPU_TARGET_ARCHS} STREQUAL "avx512") + message(STATUS "Enabling AVX512 support for the auto-vectorizers (-mprefer-vector-width=512).") + target_compile_options( + ${PLSSVM_BASE_LIBRARY_NAME} PUBLIC $<$:$<$:-mprefer-vector-width=512>> + ) + elseif (${PLSSVM_CPU_TARGET_ARCHS} STREQUAL "avx2" OR ${PLSSVM_CPU_TARGET_ARCHS} STREQUAL "avx") + message(STATUS "Enabling AVX/AVX2 support for the auto-vectorizers (-mprefer-vector-width=256).") + target_compile_options( + ${PLSSVM_BASE_LIBRARY_NAME} PUBLIC $<$:$<$:-mprefer-vector-width=256>> + ) + elseif (${PLSSVM_CPU_TARGET_ARCHS} MATCHES "^sse") + message(STATUS "Enabling SSE for the auto-vectorizers (-mprefer-vector-width=128).") + target_compile_options( + ${PLSSVM_BASE_LIBRARY_NAME} PUBLIC $<$:$<$:-mprefer-vector-width=128>> + ) + else () + message(FATAL_ERROR "Unrecognized CPU target architecture \"${PLSSVM_CPU_TARGET_ARCHS}\". Allowed values are: avx512, avx2, avx, sse.") + endif () +else () + # automatically use the "optimal" auto-vectorizer width +endif () + ######################################################################################################################## # check for optional and necessary dependencies # ######################################################################################################################## @@ -646,8 +678,8 @@ include(FetchContent) list(APPEND CMAKE_MESSAGE_INDENT "Dependencies: ") # try finding cxxopts -set(PLSSVM_cxxopts_VERSION v3.2.0) -find_package(cxxopts 3.2.0 QUIET) +set(PLSSVM_cxxopts_VERSION v3.3.1) +find_package(cxxopts 3.3.1 QUIET) if (cxxopts_FOUND) message(STATUS "Found package cxxopts.") target_include_directories(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC ${cxxopts_INCLUDE_DIR}) @@ -675,8 +707,8 @@ endif () # ~~~ set(PLSSVM_FAST_FLOAT_WRAPPER_LIBRARY_NAME plssvm-fast_float-wrapper) add_library(${PLSSVM_FAST_FLOAT_WRAPPER_LIBRARY_NAME} SHARED ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/detail/fast_float_wrapper.cpp) -set(PLSSVM_fast_float_VERSION v8.0.2) -find_package(fast_float 8.0.0 QUIET) +set(PLSSVM_fast_float_VERSION v8.1.0) +find_package(fast_float 8.1.0 QUIET) if (fast_float_FOUND) message(STATUS "Found package fast_float.") target_include_directories(${PLSSVM_FAST_FLOAT_WRAPPER_LIBRARY_NAME} PUBLIC ${fast_float_INCLUDE_DIR}) @@ -715,7 +747,7 @@ target_link_libraries(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC ${PLSSVM_FAST_FLOAT_WRA list(APPEND PLSSVM_TARGETS_TO_INSTALL "${PLSSVM_FAST_FLOAT_WRAPPER_LIBRARY_NAME}") # try finding igor -set(PLSSVM_igor_VERSION a5224c60d266974d3f407191583fe266cbe1c93d) +set(PLSSVM_igor_VERSION fef608b458ab2fca736ac8e415e4d4160a48745d) # can't use a newer commit since igor switch to mandatory C++20 find_package(igor QUIET) if (igor_FOUND) message(STATUS "Found package igor.") @@ -725,6 +757,8 @@ else () target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PRIVATE PLSSVM_igor_VERSION="${PLSSVM_igor_VERSION}") # set options for igor set(IGOR_BUILD_TESTS OFF CACHE INTERNAL "" FORCE) + # disable additional compiler flags check (only warning flags) since for some reasons it takes ages with stdpar compilers + set(_YACMACompilerLinkerSettingsIncluded ON CACHE INTERNAL "" FORCE) # fetch named argument library igor FetchContent_Declare( igor @@ -768,6 +802,7 @@ else () ) FetchContent_MakeAvailable(fmt) set_property(TARGET fmt PROPERTY POSITION_INDEPENDENT_CODE ON) + set_target_properties(fmt PROPERTIES CXX_CLANG_TIDY "") target_compile_definitions(fmt PRIVATE FMT_USE_FULL_CACHE_DRAGONBOX) if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le") target_compile_definitions(fmt PUBLIC FMT_USE_FLOAT128=0) @@ -808,6 +843,58 @@ if (PLSSVM_ENABLE_FORMATTING) list(POP_BACK CMAKE_MESSAGE_INDENT) endif () +######################################################################################################################## +# enable clang-tidy static analyzer checks # +######################################################################################################################## +option(PLSSVM_ENABLE_CLANG_TIDY "Enable clang-tidy static code analysis." OFF) +if (PLSSVM_ENABLE_CLANG_TIDY) + list(APPEND CMAKE_MESSAGE_INDENT "clang-tidy: ") + message(CHECK_START "Checking for clang-tidy executable") + + # check if the clang-tidy executable is available + find_program(PLSSVM_CLANG_TIDY_EXE clang-tidy) # usage as part of the compilation process (can't analyse headers) + find_program(PLSSVM_RUN_CLANG_TIDY_EXE run-clang-tidy) # usage as standalone target (can analyse headers) + if (NOT PLSSVM_CLANG_TIDY_EXE OR NOT PLSSVM_RUN_CLANG_TIDY_EXE) + # clang-tidy executable not found + message(CHECK_FAIL "not found") + message(SEND_ERROR "Cannot find requested clang-tidy/run-clang-tidy for static analysis!") + else () + # clang-tidy executable found -> enable it + message(CHECK_PASS "found") + + # clang-tidy support during compilation + message(STATUS "Enabling static analysis via clang-tidy during compilation.") + set(CMAKE_CXX_CLANG_TIDY "clang-tidy;-quiet") + + # custom clang-tidy target that also supports headers + message(STATUS "Enabling custom clang-tidy-all target.") + + # create a custom command that filters unwanted flags + add_custom_target( + filter_compile_commands + COMMAND ${CMAKE_COMMAND} -DPLSSVM_COMPILE_COMMANDS_JSON_FILE=${CMAKE_BINARY_DIR}/compile_commands.json -P + ${CMAKE_SOURCE_DIR}/cmake/filter_compile_commands.cmake + COMMENT "Generating a sanitized compile_commands.json" + VERBATIM + ) + + # create the custom target + add_custom_target( + clang-tidy-all COMMAND ${PLSSVM_RUN_CLANG_TIDY_EXE} -p "${CMAKE_BINARY_DIR}" -quiet "${PROJECT_SOURCE_DIR}/src" + "${PROJECT_SOURCE_DIR}/bindings/Python" "${PROJECT_SOURCE_DIR}/tests" DEPENDS filter_compile_commands + ) + + # clang-tidy is not supported by all backends + message( + WARNING + "Since some backends use special compilers with custom compiler flags that are NOT understood by clang-tidy, the target may fail to correctly build! " + "Some precautions are taken to make the clang-tidy-all target more compatible with these special compilers, but it does not work in all cases!" + ) + endif () + + list(POP_BACK CMAKE_MESSAGE_INDENT) +endif () + ######################################################################################################################## # enable documentation generation via doxygen # ######################################################################################################################## @@ -914,16 +1001,16 @@ if (TARGET ${PLSSVM_SYCL_BACKEND_LIBRARY_NAME}) choose the SYCL implementation to be used in the SYCL backend: ${PLSSVM_SYCL_BACKEND_NAME_LIST} (default: automatic) " ) - string(REPLACE ";" "|" PLSSVM_SYCL_KERNEL_INVOCATION_TYPE_NAME_LIST "${PLSSVM_SYCL_KERNEL_INVOCATION_TYPE_NAME_LIST}") - set(PLSSVM_SYCL_KERNEL_INVOCATION_TYPE_MANPAGE_ENTRY + string(REPLACE ";" "|" PLSSVM_SYCL_DATA_PARALLEL_KERNEL_NAME_LIST "${PLSSVM_SYCL_DATA_PARALLEL_KERNEL_NAME_LIST}") + set(PLSSVM_SYCL_DATA_PARALLEL_KERNEL_MANPAGE_ENTRY " .TP -.B --sycl_kernel_invocation_type -choose the kernel invocation type when using SYCL as backend: ${PLSSVM_SYCL_KERNEL_INVOCATION_TYPE_NAME_LIST} (default: automatic) +.B --sycl_data_parallel_kernel +choose the data parallel kernel when using SYCL as backend: ${PLSSVM_SYCL_DATA_PARALLEL_KERNEL_NAME_LIST} (default: automatic) " ) endif () -set(PLSSVM_SYCL_MANPAGE_ENTRY "${PLSSVM_SYCL_KERNEL_INVOCATION_TYPE_MANPAGE_ENTRY}${PLSSVM_SYCL_IMPLEMENTATION_TYPE_MANPAGE_ENTRY}") +set(PLSSVM_SYCL_MANPAGE_ENTRY "${PLSSVM_SYCL_DATA_PARALLEL_KERNEL_MANPAGE_ENTRY}${PLSSVM_SYCL_IMPLEMENTATION_TYPE_MANPAGE_ENTRY}") # assemble the Kokkos manpage entry if (TARGET ${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME}) string(REPLACE ";" "|" PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "${PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES}") diff --git a/README.md b/README.md index c764ed4d1..e6933d846 100644 --- a/README.md +++ b/README.md @@ -31,38 +31,41 @@ A [Support Vector Machine (SVM)](https://en.wikipedia.org/wiki/Support-vector_machine) is a supervised machine learning model. In its basic form SVMs are used for binary classification tasks. Their fundamental idea is to learn a hyperplane which separates the two classes best, i.e., where the widest possible margin around its decision boundary is free of data. -This is also the reason, why SVMs are also called "large margin classifiers". +This is also the reason, why SVMs are also called "large margin classifiers." To predict to which class a new, unseen data point belongs, the SVM simply has to calculate on which side of the previously calculated hyperplane the data point lies. -This is very efficient since it only involves a single scalar product of the size corresponding to the numer of features of the data set. +This is very efficient since it only involves a single scalar product of the size corresponding to the numer of features per data point in the data set.

- Basic idea of an Support Vector Machine as classification model. + Basic idea of an Support Vector Machine as classification model.

-However, normal SVMs suffer in their potential parallelizability. +However, normal SVMs suffer from their potential parallelizability. Determining the hyperplane boils down to solving a convex quadratic problem. For this, most SVM implementations use Sequential Minimal Optimization (SMO), an inherently sequential algorithm. The basic idea of this algorithm is that it takes a pair of data points and calculates the hyperplane between them. Afterward, two new data points are selected and the existing hyperplane is adjusted accordingly. -This procedure is repeat until a new adjustment would be smaller than some epsilon greater than zero. +This procedure is repeated until a new adjustment would be smaller than some epsilon greater than zero. Some SVM implementations try to harness some parallelization potential by not drawing point pairs but group of points. In this case, the hyperplane calculation inside this group is parallelized. -However, even then modern highly parallel hardware can not be utilized efficiently. +However, even then, modern highly parallel hardware cannot be utilized efficiently. Therefore, we implemented a version of the original proposed SVM called [Least Squares Support Vector Machine (LS-SVM)](https://en.wikipedia.org/wiki/Least-squares_support-vector_machine). The LS-SVMs reformulated the original problem such that it boils down to solving a system of linear equations. -For this kind of problem many highly parallel algorithms and implementations are known. +For this kind of problem, many highly parallel algorithms and implementations are known. We decided to use the [Conjugate Gradient (CG)](https://en.wikipedia.org/wiki/Conjugate_gradient_method) to solve the system of linear equations. The main highlights of our SVM implementations are: 1. Drop-in replacement for LIBSVM's `svm-train`, `svm-predict`, and `svm-scale` (some features currently not implemented). -2. Support of multiple different programming frameworks for parallelization (also called backends in our PLSSVM implementation) which allows us to target GPUs and CPUs from different vendors like NVIDIA, AMD, or Intel: +2. Support for multiple different programming frameworks for parallelization (also called backends in our PLSSVM implementation) which allows us to support GPUs and CPUs from different vendors like NVIDIA, AMD, or Intel: - [OpenMP](https://www.openmp.org/) - [HPX](https://hpx.stellar-group.org/) (tested with current master) - C++ 17's standard parallelism [stdpar](https://en.cppreference.com/w/cpp/algorithm):
**Note**: due to the nature of the used USM mechanics in the `stdpar` implementations, the `stdpar` backend **can't** be enabled together with **any** other backend!
- **Note**: since every translation units need to be compiled with the same flag, we currently globally set `CMAKE_CXX_FLAGS` although it's discouraged. + **Note**: since every translation unit needs to be compiled with the same flag, we currently globally set `CMAKE_CXX_FLAGS` although it's discouraged. - [nvc++](https://developer.nvidia.com/hpc-sdk) from NVIDIA's HPC SDK (tested with version [25.3](https://docs.nvidia.com/hpc-sdk/hpc-sdk-release-notes/index.html)) - [roc-stdpar](https://github.com/ROCm/roc-stdpar) merged into upstream LLVM starting with version 18 (tested with version [18](https://releases.llvm.org/)) - [icpx](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compiler.html) as Intel's oneAPI compiler (tested with version [2025.0.0](https://www.intel.com/content/www/us/en/developer/articles/release-notes/oneapi-dpcpp/2025.html)) @@ -83,7 +86,7 @@ The main highlights of our SVM implementations are: - laplacian: $\exp(-\gamma$ $\cdot |$ $\vec{u}$ $-$ $\vec{v}$ $|_1)$ - chi-squared (only well-defined for values > 0): $\exp(-\gamma \cdot \sum_i \frac{(x[i] - y[i])^2}{x[i] + y[i]})$ 4. Two different solver types for a trade-off between memory footprint and runtime: - - `cg_explicit`: large memory overhead but very fast + - `cg_explicit`: large memory overhead but fast - `cg_implicit`: slower but requires drastically less memory 5. Multi-class classification available via one vs. all (also one vs. rest or OAA) and one vs. one (also OAO): - OAA: one huge classification task where our CG algorithm solves a system of linear equations with multiple right-hand sides. The resulting model file is **not** compatible with LIBSVM. @@ -104,13 +107,13 @@ General dependencies: - a C++17 capable compiler (e.g. [`gcc`](https://gcc.gnu.org/) or [`clang`](https://clang.llvm.org/)) - [CMake](https://cmake.org/) 3.25 or newer -- [cxxopts ≥ v3.2.0](https://github.com/jarro2783/cxxopts), [fast_float ≥ v8.0.2](https://github.com/fastfloat/fast_float), [{fmt} ≥ v11.0.2](https://github.com/fmtlib/fmt), and [igor](https://github.com/bluescarni/igor) (all four are automatically build during the CMake configuration if they couldn't be found using the respective `find_package` call) +- [cxxopts ≥ v3.3.1](https://github.com/jarro2783/cxxopts), [fast_float ≥ v8.1.0](https://github.com/fastfloat/fast_float), [{fmt} ≥ v12.0.0](https://github.com/fmtlib/fmt), and [igor](https://github.com/bluescarni/igor) (all four are automatically build during the CMake configuration if they couldn't be found using the respective `find_package` call) - [GoogleTest ≥ v1.16.0](https://github.com/google/googletest) if testing is enabled (automatically build during the CMake configuration if `find_package(GTest)` wasn't successful) - [doxygen](https://www.doxygen.nl/index.html) if documentation generation is enabled -- [Pybind11 ≥ v2.13.6](https://github.com/pybind/pybind11) if Python bindings are enabled +- [Pybind11 ≥ v3.0.1](https://github.com/pybind/pybind11) if Python bindings are enabled - [OpenMP](https://www.openmp.org/) 4.0 or newer (optional) to speed-up library utilities (like file parsing) - [MPI](https://www.mpi-forum.org/) if distributed memory systems should be supported; [mpi4py](https://mpi4py.readthedocs.io/en/stable/) to enable interoperability in our Python bindings -- [Format.cmake](https://github.com/TheLartians/Format.cmake) if auto formatting via cmake-format and clang-format is enabled; also requires at least clang-format-18 and git, additionally, needs our custom [cmake-format fork](https://github.com/vancraar/cmake_format) incorporating some patches +- [Format.cmake](https://github.com/TheLartians/Format.cmake) if auto formatting via cmake-format and clang-format is enabled; it also requires at least clang-format-18 and git, additionally, needs our custom [cmake-format fork](https://github.com/vancraar/cmake_format) incorporating some patches - multiple Python modules used in the utility scripts, to install all modules use `pip install --user -r install/python_requirements.txt` Additional dependencies for the OpenMP backend: @@ -297,8 +300,15 @@ The `[optional_options]` can be one or multiple of: - `PLSSVM_ENABLE_FAST_MATH=ON|OFF` (default depending on `CMAKE_BUILD_TYPE`: `ON` for Release or RelWithDebInfo, `OFF` otherwise): enable `fast-math` compiler flags for all backends - `PLSSVM_ENABLE_ASSERTS=ON|OFF` (default: `OFF`): enables custom assertions - `PLSSVM_USE_FLOAT_AS_REAL_TYPE=ON|OFF` (default: `OFF`): use `float` as real_type instead of `double` -- `PLSSVM_THREAD_BLOCK_SIZE` (default: `8`): set a specific thread block size used in the GPU kernels (for fine-tuning optimizations) -- `PLSSVM_INTERNAL_BLOCK_SIZE` (default: `4`): set a specific internal block size used in the GPU kernels (for fine-tuning optimizations) +- `PLSSVM_THREAD_BLOCK_SIZE` (default: `8`): set a specific thread block size used in the kernels (for fine-tuning optimizations)
+ **Note**: for the different execution spaces in the Kokkos backend, the maximum value of the `PLSSVM_THREAD_BLOCK_SIZE` is not as straight forward as one may wish: + - CUDA, HIP, and SYCL: the maximum value depends on the underlying backend (in practice $\sqrt{1024}$ = 32) + - HPX and Serial: must **exactly** be 1 + - OpenMP: must be 1 or 2 (most likely only 1 will work) + - Threads: must be 1; however, note that Kokkos itself **must** be built with hwloc support (via `-DKokkos_ENABLE_HWLOC=ON`), otherwise the Kokkos::Threads execution space will always only use a single core + - OpenMPTarget: $\sqrt{256}$ = 16 + - OpenACC: $\lfloor\sqrt{512}\rfloor$ = 22 +- `PLSSVM_INTERNAL_BLOCK_SIZE` (default: `4`): set a specific internal block size used in the kernels (for fine-tuning optimizations) - `PLSSVM_ENABLE_LTO=ON|OFF` (default: `OFF`): enable interprocedural optimization (IPO/LTO) if supported by the compiler - `PLSSVM_ENFORCE_MAX_MEM_ALLOC_SIZE=ON|OFF` (default: `ON`): enforce the maximum (device) memory allocation size for the plssvm::solver_type::automatic solver - `PLSSVM_ENABLE_PINNED_MEMORY=ON|OFF` (default: `OFF`): use host pinned memory for the input matrix when assembling the kernel matrix, if available @@ -347,7 +357,7 @@ If the SYCL backend is available, additional options can be set. - `AUTO`: check for DPC++/icpx as implementation for the SYCL backend but **do not** fail if not available - `OFF`: do not check for DPC++/icpx as implementation for the SYCL backend -- `PLSSVM_ENABLE_SYCL_HIERARCHICAL_AND_SCOPED_KERNELS` (default: `ON`): enable SYCL's `hierarchical` and AdaptiveCpp's `scoped` kernel invocation types +- `PLSSVM_ENABLE_SYCL_HIERARCHICAL_AND_SCOPED_KERNELS` (default: `ON`): enable SYCL's `hierarchical` data parallel kernel and AdaptiveCpp's `scoped` parallelism To use DPC++/icpx for SYCL, simply set the `CMAKE_CXX_COMPILER` to the respective DPC++/icpx clang executable during CMake invocation. @@ -355,7 +365,7 @@ If the SYCL implementation is DPC++/icpx the following additional options are av - `PLSSVM_SYCL_BACKEND_DPCPP_USE_LEVEL_ZERO` (default: `ON`): use DPC++/icpx's Level-Zero backend instead of its OpenCL backend **(only available if a CPU or Intel GPU is targeted)** -If the SYCL implementation is AdaptiveCpp the following additional option is available: +If the SYCL implementation is AdaptiveCpp, the following additional option is available: - `PLSSVM_SYCL_BACKEND_ADAPTIVECPP_USE_GENERIC_SSCP` (default: `ON`): use AdaptiveCpp's new SSCP compilation flow @@ -366,7 +376,7 @@ If more than one SYCL implementation is available the environment variables `PLS If the Kokkos backend is available, an additional option can be set. -- `PLSSVM_KOKKOS_BACKEND_SYCL_ENABLE_MULTI_GPU` (default: `OFF`): enable multi-GPU support for the Kokkos::SYCL execution space; broken in Kokkos as of version 4.6.00! +- `PLSSVM_KOKKOS_BACKEND_SYCL_ENABLE_MULTI_GPU` (default: `OFF`): enable multi-GPU support for the Kokkos::SYCL execution space; broken in Kokkos as of version 4.6.01! If the stdpar backend is available, an additional option can be set. @@ -455,6 +465,19 @@ However, these additional options can be enabled using normal CMake options. **Note**: the only difference between the dpcpp and icpx presets is the automatically set `CMAKE_CXX_COMPILER`. Internally, both presets use the same SYCL implementation. +#### HPX and including the hpx_main.hpp header + +HPX defines some command line options to change its runtime behavior, e.g., `--hpx:threads`. +However, it also defines some shortcuts for these command line options like `-t`. +The problem is that these shortcut command line options are likely to collide with other command line options. +In the example above the HPX `-t` option to set the number of used threads collides with PLSSVM's `-t` option to determine the kernel function leading to unwanted behavior. +The only way to disable HPX's shortcut command line options is by including `"hpx/hpx_main.hpp"` which is automatically done in our `"plssvm/environment.hpp"` header. +However, the [HPX documentation](https://hpx-docs.stellar-group.org/latest/html/manual/starting_the_hpx_runtime.html) states that this header should only be included in the main executable or otherwise linker errors wil occur. +If the `"plssvm/environment.hpp"` header is included in another file, like in our case the custom GoogleTest main implementation, the `"hpx/hpx_main.cpp"` should not be included. +This can be achieved by specifying `PLSSVM_HPX_DO_NOT_INCLUDE_HPX_MAIN` before including our header. + +All of this also applies to the Kokkos::HPX execution space. + ### Running the Tests To run the tests after building the library (with `PLSSVM_ENABLE_TESTING` set to `ON`) use: @@ -497,7 +520,7 @@ Our `cmake-format` can be installed via: pip install "git+https://github.com/vancraar/cmake_format@master" ``` -To check whether formatting changes must be applied use: +To check whether formatting changes must be applied, one can use: ```bash cmake --build . --target check-cmake-format @@ -519,7 +542,7 @@ If doxygen is installed and `PLSSVM_ENABLE_DOCUMENTATION` is set to `ON` the doc cmake --build . -- doc ``` -The documentation of the current state of the main branch can be found [here](https://sc-sgs.github.io/PLSSVM/). +The documentation of the current main branch can be found [here](https://sc-sgs.github.io/PLSSVM/). ### Installing @@ -528,7 +551,7 @@ The documentation of the current state of the main branch can be found [here](ht The library supports the `install` target: ```bash -cmake --build . -- install +cmake --build . --target install ``` Afterward, the necessary exports should be performed: @@ -550,13 +573,13 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${CMAKE_INSTALL_PREFIX}/lib64:${PY #### Install via pip -We also support a pip packages that can be used to install our library: +We also support a pip package that can be used to install our library: ```bash pip install plssvm ``` -This pip install behaves **as if** the CMake `all_python` preset is used. +This pip installation behaves **as if** the CMake `all_python` preset is used. This means that the `PLSSVM_TARGET_PLATFORMS` are automatically determined and PLSSVM is build with all supported backends that available on the target machine at the point of the `pip install plssvm` invocation. To check the installation, including, e.g., the installed backends, we provide the `plssvm-install-check` command after @@ -588,13 +611,17 @@ Issues: https://github.com/SC-SGS/PLSSVM/issues PLSSVM provides three executables: `plssvm-train`, `plssvm-predict`, and `plssvm-scale`. In addition, PLSSVM can also be used as a library in third-party code. -For more information, see the respective `man` pages which are installed via `cmake --build . -- install`. +For more information, see the respective `man` pages which are installed via `cmake --build . -- install`. + +We support the command line options of the third-party libraries [HPX](https://hpx.stellar-group.org/) and [Kokkos](https://github.com/kokkos/kokkos) +by forwarding the command line options to the respective initialization functions. +Internally, these options are filtered out before they are passed to our command line parser utility. ### Generating Artificial Data The repository comes with a Python3 script (in the `utility_scripts/` directory) to simply generate arbitrarily large classification and regression data sets. -In order to use all functionality, the following Python3 modules must be installed: +To use all functionality, the following Python3 modules must be installed: [`argparse`](https://docs.python.org/3/library/argparse.html), [`timeit`](https://docs.python.org/3/library/timeit.html), [`numpy`](https://pypi.org/project/numpy/), [`pandas`](https://pypi.org/project/pandas/), [`sklearn`](https://scikit-learn.org/stable/), [`arff`](https://pypi.org/project/arff/), @@ -643,7 +670,7 @@ optional arguments: ``` -An example invocation generating a classification data set consisting of blobs with 1000 data points with 200 features each and +An example invocation generating a classification data set consisting of blobs with 1000 data points with 200 features and 4 classes could look like: ```bash @@ -656,7 +683,7 @@ An example invocation generating a linear regression data set consisting of 1000 python3 generate_data.py --output data_file --format libsvm --problem linear --samples 1000 --features 200 regression ``` -### Training using `plssvm-train` +### Training using plssvm-train ```bash ./plssvm-train --help @@ -686,8 +713,8 @@ Usage: -a, --classification arg the classification strategy to use for multi-class classification: oaa|oao (default: oaa) -b, --backend arg choose the backend: automatic|openmp|hpx|cuda|hip|opencl|sycl|kokkos|stdpar (default: automatic) -p, --target_platform arg choose the target platform: automatic|cpu|gpu_nvidia|gpu_amd|gpu_intel (default: automatic) - --sycl_kernel_invocation_type arg - choose the kernel invocation type when using SYCL as backend: automatic|basic|work_group|hierarchical|scoped (default: automatic) + --sycl_data_parallel_kernel arg + choose the data parallel kernel when using SYCL as backend: automatic|basic|work_group|hierarchical|scoped (default: automatic) --sycl_implementation_type arg choose the SYCL implementation to be used in the SYCL backend: automatic|dpcpp|adaptivecpp (default: automatic) --kokkos_execution_space arg @@ -709,7 +736,7 @@ Usage: The help message only print options available based on the CMake invocation. For example, if CUDA was not available during the build step, it will not show up as possible backend in the description of the `--backend` option. -The most minimal example invocation is: +The most minimal example of an invocation is: ```bash ./plssvm-train /path/to/data_file @@ -734,7 +761,7 @@ The `--backend=automatic` option works as follows: - otherwise, if the `gpu_intel` target is available, check for existing backends in order `sycl` 🠦 `opencl` 🠦 `kokkos` 🠦 `stdpar` - otherwise, if the `cpu` target is available, check for existing backends in order `sycl` 🠦 `kokkos` 🠦 `opencl` 🠦 `openmp` 🠦 `hpx` 🠦 `stdpar` -Note that during CMake configuration it is guaranteed that at least one of the above combinations does exist. +Note that during CMake configuration, it is guaranteed that at least one of the above combinations does exist. The `--target_platform=automatic` option works for the different backends as follows: @@ -747,18 +774,19 @@ The `--target_platform=automatic` option works for the different backends as fol - `Kokkos`: checks which execution spaces are available and which target platforms they support and then tries to find available devices in the following order: NVIDIA GPUs 🠦 AMD GPUs 🠦 Intel GPUs 🠦 CPU - `stdpar`: target device must be selected at compile time (using `PLSSVM_TARGET_PLATFORMS`) or using environment variables at runtime -The `--sycl_kernel_invocation_type` and `--sycl_implementation_type` flags are only used if the `--backend` is `sycl`, otherwise a warning is emitted on `stderr`. -If the `--sycl_kernel_invocation_type` is `automatic`, the `work_group` invocation type is currently always used. +The `--sycl_data_parallel_kernel` and `--sycl_implementation_type` flags are only used if the `--backend` is `sycl`, otherwise a warning is emitted on `stderr`. +If the `--sycl_data_parallel_kernel` is `automatic`, the `work_group` data parallel kernels are currently always used. If the `--sycl_implementation_type` is `automatic`, the used SYCL implementation is determined by the `PLSSVM_SYCL_BACKEND_PREFERRED_IMPLEMENTATION` CMake flag. If the `--kokkos_execution_space` is `automatic`, uses the best fitting execution space based on the provided and/or available target platforms. -### Predicting using `plssvm-predict` +### Predicting using plssvm-predict -Our predict utility is fully conform to LIBSVM's model files. +Our `plssvm-predict` utility is fully conforming to LIBSVM's model files. This means that our `plssvm-predict` can be used on model files learned with, e.g., LIBSVM's `svm-train`. Note: this is not the case for the regression task since the `svm_type` filed mismatch between LIBSVM (`epsilon_svr`) -and PLSSVM (`c_svr`). To automatically convert between the two, simply use the `convert_model.py` script -(in the `utility_scripts/` directory) which simply replaces these fields with the respectively expected one +and PLSSVM (`c_svr`). +To automatically convert between the two, the `convert_model.py` script (in the `utility_scripts/` directory) +can be used which simply replaces these fields with the respectively expected one (note that for large files doing that manually may be faster): ```bash @@ -796,8 +824,8 @@ Usage: -b, --backend arg choose the backend: automatic|openmp|hpx|cuda|hip|opencl|sycl|kokkos|stdpar (default: automatic) -p, --target_platform arg choose the target platform: automatic|cpu|gpu_nvidia|gpu_amd|gpu_intel (default: automatic) - --sycl_kernel_invocation_type arg - choose the kernel invocation type when using SYCL as backend: automatic|basic|work_group|hierarchical|scoped (default: automatic) + --sycl_data_parallel_kernel arg + choose the data parallel kernel when using SYCL as backend: automatic|basic|work_group|hierarchical|scoped (default: automatic) --sycl_implementation_type arg choose the SYCL implementation to be used in the SYCL backend: automatic|dpcpp|adaptivecpp (default: automatic) --kokkos_execution_space arg @@ -830,7 +858,7 @@ Another example targeting NVIDIA GPUs using the SYCL backend looks like: The `--target_platform=automatic` and `--sycl_implementation_type` flags work like in the training (`./plssvm-train`) case. -### Data Scaling using `plssvm-scale` +### Data Scaling using plssvm-scale ```bash ./plssvm-scale --help @@ -872,7 +900,7 @@ An example invocation to scale a train and test file in the same way looks like: ### Distributed Memory Support via MPI We support distributed memory via MPI for `plssvm-train` and `plssvm-predict` while simultaneously allowing multiple devices per MPI rank. -In order to use it, MPI must be found during the CMake configuration step. +To use MPI, it must be found during the CMake configuration step. Note that if MPI couldn't be found, PLSSVM still works in shared memory mode only and internally disables all MPI related functionality. For example, to run PLSSVM via MPI on four nodes simply use the normal `mpirun` command: @@ -892,17 +920,33 @@ Note that the number of provided load balancing weights must be equal to the use If one MPI rank has more than one device, all these devices on one MPI rank compute the same number of matrix elements. Our MPI implementation, however, currently has some limitations: -- the training, test, and model data is fully read by **every** MPI rank -- the training, test, and model data is fully stored on **each** compute device on **every** MPI rank +- **every** MPI rank fully reads the training, test, and model data +- **each** compute device on **every** MPI rank fully stores the training, test, and model data - **only** the kernel matrix is really divided across **all** MPI ranks - while the expensive BLAS level 3 operations in the CG algorithm are computed in a distributed way, everything else is computed on **every** MPI rank -- in the CG algorithm we communicate the whole matrix, although it would be sufficient to communicate only matrix parts +- in the CG algorithm we communicate the whole matrix, although it would be enough to communicate only matrix parts - **only** the **main** MPI rank (per default rank 0) writes the output files - `plssvm-scale` **does not** support more than one MPI rank +### Device Filtering + +Since PLSSVM supports devices from different target platforms as well as multiple devices from a single target platform, +it may be necessary to filter them at runtime to, e.g., select only a single GPU in a multi-GPU setup. +In general, this device filtering is done using environment variables. +However, the exact environment variable and syntax depends on the used backend: + +- `CUDA`: use [`CUDA_VISIBLE_DEVICES`](https://docs.nvidia.com/deploy/topics/topic_5_2_1.html) (e.g., `CUDA_VISIBLE_DEVICES=0,2`) +- `HIP`: use [`HIP_VISIBLE_DEVICES`](https://rocm.docs.amd.com/en/latest/conceptual/gpu-isolation.html#hip-visible-devices) (e.g., `HIP_VISIBLE_DEVICES=0,2`) +- `OpenCL`: use the PLSSVM specific `PLSSVM_OPENCL_DEVICE_FILTER` variable (e.g., `PLSSVM_OPENCL_DEVICE_FILTER=gpu_nvidia:0;gpu_nvidia:2`) with the syntax: `target_platform:device_id;...`; +alternatively, in many cases the respective vendor mechanism can also be used +- `SYCL` using DPC++/icpx: use [`ONEAPI_DEVICE_SELECTOR`](https://intel.github.io/llvm/EnvironmentVariables.html#oneapi-device-selector) (e.g., `ONEAPI_DEVICE_SELECTOR=cuda:0,2`) +- `SYCL` using AdaptiveCpp: use [`ACPP_VISIBILITY_MASK`](https://github.com/AdaptiveCpp/AdaptiveCpp/blob/develop/doc/env_variables.md) for a broader backend level selector and the respective vendor specific environment variables for a more fine-grained selection mechanism +- `Kokkos`: use the mechanism form the respective execution space +- `stdpar`: use the mechanism from the respective implementation + ### Example Code for PLSSVM Used as a Library -A simple C++ program (`main_classification.cpp`) using PLSSVM as library for classification could look like: +A simple C++ program (`main_classification.cpp`) using PLSSVM as a library for classification could look like: ```cpp #include "plssvm/core.hpp" @@ -940,7 +984,7 @@ int main() { const std::vector &correct_label = test_data.labels().value(); std::cout << plssvm::classification_report{ correct_label, predicted_label } << std::endl; - // write model file to disk + // write the model file to disk model.save("model_file.libsvm"); } catch (const plssvm::exception &e) { std::cerr << e.what_with_loc() << std::endl; @@ -952,7 +996,7 @@ int main() { } ``` -A simple C++ program (`main_regression.cpp`) using PLSSVM as library for regression could look like: +A simple C++ program (`main_regression.cpp`) using PLSSVM as a library for regression could look like: ```cpp #include "plssvm/core.hpp" @@ -990,7 +1034,7 @@ int main() { const std::vector &correct_values = test_data.labels().value(); std::cout << plssvm::regression_report{ correct_label, predicted_label } << std::endl; - // write model file to disk + // write the model file to disk model.save("model_file.libsvm"); } catch (const plssvm::exception &e) { std::cerr << e.what_with_loc() << std::endl; @@ -1036,7 +1080,7 @@ endforeach () The `examples/python` directory contains the same examples using our PLSSVM Python bindings. Additionally, it contains Python examples leveraging MPI to target distributed memory systems. -### Example Using the `sklearn` like Python Bindings Available For PLSSVM +### Example Using the sklearn like Python Bindings Available For PLSSVM A classification example using PLSSVM's `SVC` Python binding and sklearn's breast cancer data set: @@ -1083,13 +1127,13 @@ sklearn.inspection.DecisionBoundaryDisplay.from_estimator( ) # scatter plot the decision boundary -viridis = plt.cm.get_cmap('viridis', len(np.unique(y))) +viridis = plt.get_cmap('viridis', len(np.unique(y))) plt.scatter(X[:, 0], X[:, 1], cmap=viridis, c=y, s=20, edgecolors="k") -# generate legend handles and add handle +# generate legend handles legend_handles = [plt.scatter([], [], color=viridis(color), label=f'{label}') for label, color in zip(y_label, np.unique(y))] plt.legend(handles=legend_handles) @@ -1111,7 +1155,10 @@ weighted avg 0.91 0.91 0.91 569 Score: 91.39% ```

- Example classification task breast cancer decision boundary output. + Example classification task breast cancer decision boundary output.

A regression example comparing PLSSVM's `SVR` Python binding and `sklearn.SVR` using a sine curve: @@ -1178,14 +1225,17 @@ plt.show() ``` with an example output:

- Example regression output using a sine curve. + Example regression output using a sine curve.

Note that currently not all sklearn `SVC` and `SVR` functionality has been implemented in PLSSVM. The respective functions will throw a Python `AttributeError` if called. For a detailed overview of the functions that are currently implemented, see [our API documentation](bindings/Python/README.md). -There are more examples located in the `examples/python/sklearn` directory that are copied from the sklearn repository and slightly changed for PLSSVM. +There are more examples located in the [examples/python/sklearn](examples/python/sklearn/README.md) directory that are copied from the sklearn repository and slightly changed for PLSSVM. ## Citing PLSSVM diff --git a/bindings/Python/.clang-tidy b/bindings/Python/.clang-tidy new file mode 100644 index 000000000..cdf2ade2d --- /dev/null +++ b/bindings/Python/.clang-tidy @@ -0,0 +1,6 @@ +--- +Checks: ' +-misc-include-cleaner, +' + +InheritParentConfig: true \ No newline at end of file diff --git a/bindings/Python/CMakeLists.txt b/bindings/Python/CMakeLists.txt index 19e8777c1..4d41f7fcf 100644 --- a/bindings/Python/CMakeLists.txt +++ b/bindings/Python/CMakeLists.txt @@ -10,9 +10,13 @@ message(STATUS "Building Python language bindings for PLSSVM.") find_package(Python COMPONENTS Interpreter Development) +# ~~~ +# Note: Even with newer Pybind11 version, we cannot really use py::native_enum since it DOES NOT support string -> C++ enum +# conversions and I couldn't find a solution to make it work without worsening the public API. +# ~~~ # try finding pybind11 -set(PLSSVM_pybind11_VERSION v2.13.6) -find_package(pybind11 2.13.6 QUIET) +set(PLSSVM_pybind11_VERSION v3.0.1) +find_package(pybind11 3.0.1 QUIET) if (pybind11_FOUND) message(STATUS "Found package pybind11.") else () @@ -39,6 +43,7 @@ set(PLSSVM_PYTHON_BINDINGS_SOURCES ${CMAKE_CURRENT_LIST_DIR}/model/regression_model.cpp ${CMAKE_CURRENT_LIST_DIR}/sklearn_like/svc.cpp ${CMAKE_CURRENT_LIST_DIR}/sklearn_like/svr.cpp + ${CMAKE_CURRENT_LIST_DIR}/sklearn_like/tags.cpp ${CMAKE_CURRENT_LIST_DIR}/svm/csvm.cpp ${CMAKE_CURRENT_LIST_DIR}/svm/csvc.cpp ${CMAKE_CURRENT_LIST_DIR}/svm/csvr.cpp @@ -73,7 +78,7 @@ if (TARGET ${PLSSVM_HPX_BACKEND_LIBRARY_NAME}) endif () if (TARGET ${PLSSVM_STDPAR_BACKEND_LIBRARY_NAME}) - # AdaptiveCpp stdpar only support on the CPU when using our Python bindings + # AdaptiveCpp stdpar only support the CPU as target in our Python bindings if (PLSSVM_STDPAR_BACKEND STREQUAL "ACPP" AND (DEFINED PLSSVM_NVIDIA_TARGET_ARCHS OR DEFINED PLSSVM_AMD_TARGET_ARCHS OR DEFINED PLSSVM_INTEL_TARGET_ARCHS)) message( FATAL_ERROR @@ -116,6 +121,7 @@ target_compile_definitions(${PLSSVM_PYTHON_BINDINGS_LIBRARY_NAME} PRIVATE PYBIND # disable clang compiler warning target_compile_options(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC $<$:-Wno-self-assign-overloaded>) +target_compile_options(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC $<$:-Wno-null-dereference>) target_compile_options(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC -fPIC) include(GNUInstallDirs) diff --git a/bindings/Python/README.md b/bindings/Python/README.md index 504d2533b..d6786bd5a 100644 --- a/bindings/Python/README.md +++ b/bindings/Python/README.md @@ -1,3 +1,5 @@ + + # The Python3 Bindings - [Sklearn like API for sklearn.svm.SVC](#sklearn-like-api-for-sklearnsvmsvc) @@ -28,10 +30,9 @@ of [`sklearn.svm.SVC`](https://scikit-learn.org/stable/modules/generated/sklearn `sklearn.svm.SVR`](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html) and one extremely closely to our C++ API. -**Note**: this page is solely meant as an API reference and overview. For examples see the -top-level [`../../examples/`](/examples) folder. +**Note**: this page is solely meant as an API reference and overview. For examples see the examples folder. -## Sklearn like API for `sklearn.svm.SVC` +## Sklearn like API for sklearn.svm.SVC The following tables show the API provided by [`sklearn.svm.SVC`](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html) and whether we currently @@ -187,7 +188,7 @@ More detailed description of the class methods: - Returns: - `self : object`: The updated object. -## Sklearn like API for `sklearn.svm.SVR` +## Sklearn like API for sklearn.svm.SVR The following tables show the API provided by [`sklearn.svm.SVR`](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html) and whether we currently @@ -332,10 +333,10 @@ The following table lists all PLSSVM enumerations exposed on the Python side: If a SYCL implementation is available, additional enumerations are available: -| enumeration | values | description | -|------------------------|--------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `ImplementationType` | `AUTOMATIC`, `DPCPP`, `ADAPTIVECPP` | The different supported SYCL implementation types (default: `AUTOMATIC`). If `AUTOMATIC` is provided, determines the used SYCL implementation based on the value of `-DPLSSVM_SYCL_BACKEND_PREFERRED_IMPLEMENTATION` provided during PLSSVM'S build step. | -| `KernelInvocationType` | `AUTOMATIC`, `BASIC`, `WORK_GROUP`, `HIERARCHICAL`, `SCOPED` | The different supported SYCL kernel invocation types (default: `AUTOMATIC`). If `AUTOMATIC` is provided, simply uses `WORK_GROUP`. | +| enumeration | values | description | +|----------------------|--------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `ImplementationType` | `AUTOMATIC`, `DPCPP`, `ADAPTIVECPP` | The different supported SYCL implementation types (default: `AUTOMATIC`). If `AUTOMATIC` is provided, determines the used SYCL implementation based on the value of `-DPLSSVM_SYCL_BACKEND_PREFERRED_IMPLEMENTATION` provided during PLSSVM'S build step. | +| `DataParallelKernel` | `AUTOMATIC`, `BASIC`, `WORK_GROUP`, `HIERARCHICAL`, `SCOPED` | The different supported SYCL data parallel kernels (default: `AUTOMATIC`). If `AUTOMATIC` is provided, simply uses `WORK_GROUP`. | If the stdpar backend is available, an additional enumeration is available: @@ -355,7 +356,7 @@ If the Kokos backend is available, an additional enumeration is available: The following tables list all PLSSVM classes exposed on the Python side: -#### `plssvm.Parameter` +#### plssvm.Parameter The parameter class encapsulates all necessary hyperparameters needed to fit an SVM. @@ -378,7 +379,7 @@ The parameter class encapsulates all necessary hyperparameters needed to fit an | `param1 != param2` | Check whether two parameter objects aren't identical. | | `print(param)` | Overload to print a `plssvm.Parameter` object displaying the used hyper-parameters. | -#### `plssvm.ClassificationDataSet` and `plssvm.RegressionDataSet` +#### plssvm.ClassificationDataSet and plssvm.RegressionDataSet A class encapsulating a used classification or regression data set. The label types are either determined by the provided labels or if no labels are given or the data is read through a @@ -412,7 +413,7 @@ The following methods are **only** available for a `plssvm.ClassificationDataSet | `num_classes()` | Return the number of classes. **Note**: `0` if no labels are present. | | `classes()` | Return the different classes, if labels are present. | -#### `plssvm.MinMaxScaler` +#### plssvm.MinMaxScaler A class encapsulating and performing the scaling of a data set to the provided `[lower, upper]` range. @@ -430,7 +431,7 @@ A class encapsulating and performing the scaling of a data set to the provided ` | `communicator()` | Return the used MPI communicator. | | `print(scaling)` | Overload to print a data set scaling object object displaying the scaling interval and number of scaling factors. | -##### `plssvm.MinMaxScalerFactors` +##### plssvm.MinMaxScalerFactors A class encapsulating a scaling factor for a specific feature in a data set obtained by `plssvm.MinMaxScaler`. **Note**: it shouldn't be necessary to directly use `plssvm.MinMaxScalerFactors` in user code. @@ -449,7 +450,7 @@ A class encapsulating a scaling factor for a specific feature in a data set obta |-------------------------|----------------------------------------------------------------------------------------------------------------| | `print(scaling_factor)` | Overload to print a data set scaling object object displaying the feature's index, minimum, and maximum value. | -#### `plssvm.CSVC` and `plssvm.CSVR` +#### plssvm.CSVC and plssvm.CSVR The main class responsible for fitting an SVM model and later predicting or scoring new data sets. It uses either the provided backend type or the default determined one to create a PLSSVM C-SVM of the correct backend @@ -469,7 +470,7 @@ The following constructors and methods are available for both classification `CS **Note**: if the backend type is `plssvm.BackendType.SYCL` two additional named parameters can be provided: `sycl_implementation_type` to choose between DPC++ and AdaptiveCpp as SYCL implementations -and `sycl_kernel_invocation_type` to choose between the two different SYCL kernel invocation types. +and `sycl_data_parallel_kernel` to choose between the different SYCL data parallel kernels. **Note**: if the backend type is `plssvm.BackendType.HPX` or `plssvm.BackendType.Kokkos` special initialization and finalization functions must be called. @@ -490,7 +491,7 @@ However, this is **automatically** handled by our Python bindings on the module **Note**: the `classification` named parameter is not allowed for the `CSVR`! -#### The backend `C-SVC`s and `C-SVR`s +#### The backend C-SVCs and C-SVRs These classes represent the backend specific C-SVMs: - OpenMP: `plssvm.openmp.CSVC` and `plssvm.openmp.CSVR` @@ -519,12 +520,12 @@ The following constructors and methods are available for both classification `CS | `CSVC(target, *, kernel_type=plssvm.KernelFunctionType.RBF, degree=3, gamma=plssvm.GammaCoefficientType.AUTO, coef0=0.0, cost=1.0, comm=*used MPI communicator*)` | Create a new C-SVM with the provided parameters and named arguments. | In case of the SYCL C-SVMs (`plssvm.sycl.CSVM`, `plssvm.dpcpp.CSVM`, and `plssvm.adaptivecpp.CSVM`; the same for the -`CSVR`s), additionally, all constructors also accept the SYCL specific `sycl_kernel_invocation_type` keyword parameter. +`CSVR`s), additionally, all constructors also accept the SYCL specific `sycl_data_parallel_kernel` keyword parameter. Also, the following method is additional available for the backend specific C-SVM: -| methods | description | -|--------------------------------|-----------------------------------------| -| `get_kernel_invocation_type()` | Return the SYCL kernel invocation type. | +| methods | description | +|------------------------------|--------------------------------------------| +| `get_data_parallel_kernel()` | Return the used SYCL data parallel kernel. | In case of the stdpar C-SVM (`plssvm.stdpar.CSVC` and `plssvm.stdpar.CSVR`) the following method is additional available for the backend specific C-SVM. @@ -540,7 +541,7 @@ Also, the following method is additional available for the backend specific C-SV |-------------------------|-----------------------------------------| | `get_execution_space()` | Return the used Kokkos execution space. | -#### `plssvm.ClassificationModel` and `plssvm::RegressionModel` +#### plssvm.ClassificationModel and plssvm::RegressionModel A class encapsulating a model learned during a call to `plssvm.CSVC.fit()` or `plssvm::CSVR.fit()`. @@ -571,7 +572,7 @@ The following methods are **only** available for a `plssvm.ClassificationModel`: | `classes()` | Return the different classes. | | `get_classification_type()` | Return the used classification strategy. | -#### `plssvm.performance_tracking` +#### plssvm.performance_tracking A submodule used to track various performance statistics like runtimes, but also the used setup and hyperparameters. The tracked metrics can be saved to a YAML file for later post-processing. @@ -592,7 +593,7 @@ The tracked metrics can be saved to a YAML file for later post-processing. | `get_events()` | Return all previously recorded events. | | `clear_tracking_entries()` | Remove all currently tracked entries from the performance tracker. | -#### `plssvm.performance_tracking.Event`, `plssvm.performance_tracking.Events` +#### plssvm.performance_tracking.Event, plssvm.performance_tracking.Events Two rather similar classes. **Note**: both classes are only available if PLSSVM was built with `-DPLSSVM_ENABLE_PERFORMANCE_TRACKING=ON`! diff --git a/bindings/Python/backend_types.cpp b/bindings/Python/backend_types.cpp index 7696ddaa8..6059c15be 100644 --- a/bindings/Python/backend_types.cpp +++ b/bindings/Python/backend_types.cpp @@ -9,18 +9,19 @@ #include "plssvm/backend_types.hpp" // plssvm::backend_type, plssvm::list_available_backends, plssvm::determine_default_backend -#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::register_implicit_str_enum_conversion +#include "plssvm/target_platforms.hpp" // plssvm::list_available_target_platforms -#include "pybind11/pybind11.h" // py::module_, py::enum_ -#include "pybind11/stl.h" // support for STL types: std::vector +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::register_implicit_str_enum_conversion -#include // std::vector +#include "pybind11/pybind11.h" // py::module_, py::enum_ +#include "pybind11/stl.h" // NOLINT: support for STL types: std::vector namespace py = pybind11; void init_backend_types(py::module_ &m) { // bind enum class - py::enum_ py_enum(m, "BackendType", "Enum class for all possible backend types, all different SYCL implementations have the same backend type \"sycl\"."); + py::enum_ py_enum(m, "BackendType", "enum.Enum", "Enum class for all possible backend types, all different SYCL implementations have the same backend type \"sycl\"."); py_enum .value("AUTOMATIC", plssvm::backend_type::automatic, "the default backend; depends on the specified target platform") .value("OPENMP", plssvm::backend_type::openmp, "OpenMP to target CPUs only (currently no OpenMP target offloading support)") diff --git a/bindings/Python/backends/adaptivecpp_csvm.cpp b/bindings/Python/backends/adaptivecpp_csvm.cpp index bf43d85f1..1d5e0e6dd 100644 --- a/bindings/Python/backends/adaptivecpp_csvm.cpp +++ b/bindings/Python/backends/adaptivecpp_csvm.cpp @@ -6,21 +6,22 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "plssvm/backend_types.hpp" // plssvm::adaptivecpp::backend_csvm_type_t -#include "plssvm/backends/SYCL/AdaptiveCpp/csvm.hpp" // plssvm::adaptivecpp::csvm -#include "plssvm/backends/SYCL/exceptions.hpp" // plssvm::adaptivecpp::backend_exception -#include "plssvm/backends/SYCL/kernel_invocation_types.hpp" // plssvm::sycl::kernel_invocation_type -#include "plssvm/constants.hpp" // plssvm::real_type -#include "plssvm/exceptions/exceptions.hpp" // plssvm::exception -#include "plssvm/gamma.hpp" // plssvm::gamma -#include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type -#include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator -#include "plssvm/parameter.hpp" // plssvm::parameter -#include "plssvm/svm/csvc.hpp" // plssvm::csvc -#include "plssvm/svm/csvm.hpp" // plssvm::csvm -#include "plssvm/svm/csvr.hpp" // plssvm::csvr -#include "plssvm/target_platforms.hpp" // plssvm::target_platform - +#include "plssvm/backend_types.hpp" // plssvm::adaptivecpp::backend_csvm_type_t +#include "plssvm/backends/SYCL/AdaptiveCpp/csvm.hpp" // plssvm::adaptivecpp::csvm +#include "plssvm/backends/SYCL/data_parallel_kernels.hpp" // plssvm::sycl::data_parallel_kernel +#include "plssvm/backends/SYCL/exceptions.hpp" // plssvm::adaptivecpp::backend_exception +#include "plssvm/constants.hpp" // plssvm::real_type +#include "plssvm/exceptions/exceptions.hpp" // plssvm::exception +#include "plssvm/gamma.hpp" // plssvm::gamma +#include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type +#include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator +#include "plssvm/parameter.hpp" // plssvm::parameter +#include "plssvm/svm/csvc.hpp" // plssvm::csvc +#include "plssvm/svm/csvm.hpp" // plssvm::csvm +#include "plssvm/svm/csvr.hpp" // plssvm::csvr +#include "plssvm/target_platforms.hpp" // plssvm::target_platform + +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings #include "bindings/Python/type_caster/mpi_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::mpi::communicator #include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::register_py_exception @@ -49,18 +50,18 @@ void bind_adaptivecpp_csvms(py::module_ &m, const std::string &csvm_name) { const std::string keyword_args_constructor_docstring{ fmt::format("create an AdaptiveCpp SYCL {} with the provided SVM parameter as separate keyword arguments including optional SYCL specific keyword arguments", csvm_name) }; py::class_(m, csvm_name.c_str(), class_docstring.c_str()) - .def(py::init([](const plssvm::target_platform target, const plssvm::parameter params, const plssvm::sycl::kernel_invocation_type invocation, plssvm::mpi::communicator comm) { - return std::make_unique(std::move(comm), target, params, plssvm::sycl_kernel_invocation_type = invocation); + .def(py::init([](const plssvm::target_platform target, const plssvm::parameter params, const plssvm::sycl::data_parallel_kernel data_parallel_kernel_type, plssvm::mpi::communicator comm) { + return std::make_unique(std::move(comm), target, params, plssvm::sycl_data_parallel_kernel = data_parallel_kernel_type); }), params_constructor_docstring.c_str(), py::arg("target") = plssvm::target_platform::automatic, py::kw_only(), py::arg("params") = default_params, - py::arg("sycl_kernel_invocation_type") = plssvm::sycl::kernel_invocation_type::automatic, + py::arg("sycl_data_parallel_kernel") = plssvm::sycl::data_parallel_kernel::automatic, py::arg("comm") = plssvm::mpi::communicator{}) - .def(py::init([](const plssvm::target_platform target, const plssvm::kernel_function_type kernel_type, const int degree, const plssvm::gamma_type gamma, const plssvm::real_type coef0, const plssvm::real_type cost, const plssvm::sycl::kernel_invocation_type invocation, plssvm::mpi::communicator comm) { + .def(py::init([](const plssvm::target_platform target, const plssvm::kernel_function_type kernel_type, const int degree, const plssvm::gamma_type gamma, const plssvm::real_type coef0, const plssvm::real_type cost, const plssvm::sycl::data_parallel_kernel data_parallel_kernel_type, plssvm::mpi::communicator comm) { const plssvm::parameter params{ kernel_type, degree, gamma, coef0, cost }; - return std::make_unique(std::move(comm), target, params, plssvm::sycl_kernel_invocation_type = invocation); + return std::make_unique(std::move(comm), target, params, plssvm::sycl_data_parallel_kernel = data_parallel_kernel_type); }), keyword_args_constructor_docstring.c_str(), py::arg("target") = plssvm::target_platform::automatic, @@ -70,11 +71,11 @@ void bind_adaptivecpp_csvms(py::module_ &m, const std::string &csvm_name) { py::arg("gamma") = default_params.gamma, py::arg("coef0") = default_params.coef0, py::arg("cost") = default_params.cost, - py::arg("sycl_kernel_invocation_type") = plssvm::sycl::kernel_invocation_type::automatic, + py::arg("sycl_data_parallel_kernel") = plssvm::sycl::data_parallel_kernel::automatic, py::arg("comm") = plssvm::mpi::communicator{}) - .def("get_kernel_invocation_type", &plssvm::adaptivecpp::csvm::get_kernel_invocation_type, "get the kernel invocation type used in this SYCL C-SVM") + .def("get_data_parallel_kernel", &plssvm::adaptivecpp::csvm::get_data_parallel_kernel, "get the data parallel kernel used in this SYCL C-SVM") .def("__repr__", [csvm_name](const backend_csvm_type &self) { - return fmt::format("", csvm_name, self.num_available_devices(), self.get_kernel_invocation_type()); + return fmt::format("", csvm_name, self.num_available_devices(), self.get_data_parallel_kernel()); }); } diff --git a/bindings/Python/backends/cuda_csvm.cpp b/bindings/Python/backends/cuda_csvm.cpp index f5984065d..b405c6e0d 100644 --- a/bindings/Python/backends/cuda_csvm.cpp +++ b/bindings/Python/backends/cuda_csvm.cpp @@ -20,6 +20,7 @@ #include "plssvm/svm/csvr.hpp" // plssvm::csvr #include "plssvm/target_platforms.hpp" // plssvm::target_platform +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings #include "bindings/Python/type_caster/mpi_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::mpi::communicator #include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::register_py_exception diff --git a/bindings/Python/backends/dpcpp_csvm.cpp b/bindings/Python/backends/dpcpp_csvm.cpp index 51dcd7e16..a9daa0de5 100644 --- a/bindings/Python/backends/dpcpp_csvm.cpp +++ b/bindings/Python/backends/dpcpp_csvm.cpp @@ -6,21 +6,22 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "plssvm/backend_types.hpp" // plssvm::dpcpp::backend_csvm_type_t -#include "plssvm/backends/SYCL/DPCPP/csvm.hpp" // plssvm::dpcpp::csvm -#include "plssvm/backends/SYCL/exceptions.hpp" // plssvm::dpcpp::backend_exception -#include "plssvm/backends/SYCL/kernel_invocation_types.hpp" // plssvm::sycl::kernel_invocation_type -#include "plssvm/constants.hpp" // plssvm::real_type -#include "plssvm/exceptions/exceptions.hpp" // plssvm::exception -#include "plssvm/gamma.hpp" // plssvm::gamma -#include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type -#include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator -#include "plssvm/parameter.hpp" // plssvm::parameter -#include "plssvm/svm/csvc.hpp" // plssvm::csvc -#include "plssvm/svm/csvm.hpp" // plssvm::csvm -#include "plssvm/svm/csvr.hpp" // plssvm::csvr -#include "plssvm/target_platforms.hpp" // plssvm::target_platform - +#include "plssvm/backend_types.hpp" // plssvm::dpcpp::backend_csvm_type_t +#include "plssvm/backends/SYCL/data_parallel_kernels.hpp" // plssvm::sycl::data_parallel_kernel +#include "plssvm/backends/SYCL/DPCPP/csvm.hpp" // plssvm::dpcpp::csvm +#include "plssvm/backends/SYCL/exceptions.hpp" // plssvm::dpcpp::backend_exception +#include "plssvm/constants.hpp" // plssvm::real_type +#include "plssvm/exceptions/exceptions.hpp" // plssvm::exception +#include "plssvm/gamma.hpp" // plssvm::gamma +#include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type +#include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator +#include "plssvm/parameter.hpp" // plssvm::parameter +#include "plssvm/svm/csvc.hpp" // plssvm::csvc +#include "plssvm/svm/csvm.hpp" // plssvm::csvm +#include "plssvm/svm/csvr.hpp" // plssvm::csvr +#include "plssvm/target_platforms.hpp" // plssvm::target_platform + +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings #include "bindings/Python/type_caster/mpi_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::mpi::communicator #include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::register_py_exception @@ -49,18 +50,18 @@ void bind_dpcpp_csvms(py::module_ &m, const std::string &csvm_name) { const std::string keyword_args_constructor_docstring{ fmt::format("create a DPC++ SYCL {} with the provided SVM parameter as separate keyword arguments including optional SYCL specific keyword arguments", csvm_name) }; py::class_(m, csvm_name.c_str(), class_docstring.c_str()) - .def(py::init([](const plssvm::target_platform target, const plssvm::parameter params, const plssvm::sycl::kernel_invocation_type invocation, plssvm::mpi::communicator comm) { - return std::make_unique(std::move(comm), target, params, plssvm::sycl_kernel_invocation_type = invocation); + .def(py::init([](const plssvm::target_platform target, const plssvm::parameter params, const plssvm::sycl::data_parallel_kernel data_parallel_kernel_type, plssvm::mpi::communicator comm) { + return std::make_unique(std::move(comm), target, params, plssvm::sycl_data_parallel_kernel = data_parallel_kernel_type); }), params_constructor_docstring.c_str(), py::arg("target") = plssvm::target_platform::automatic, py::kw_only(), py::arg("params") = default_params, - py::arg("sycl_kernel_invocation_type") = plssvm::sycl::kernel_invocation_type::automatic, + py::arg("sycl_data_parallel_kernel") = plssvm::sycl::data_parallel_kernel::automatic, py::arg("comm") = plssvm::mpi::communicator{}) - .def(py::init([](const plssvm::target_platform target, const plssvm::kernel_function_type kernel_type, const int degree, const plssvm::gamma_type gamma, const plssvm::real_type coef0, const plssvm::real_type cost, const plssvm::sycl::kernel_invocation_type invocation, plssvm::mpi::communicator comm) { + .def(py::init([](const plssvm::target_platform target, const plssvm::kernel_function_type kernel_type, const int degree, const plssvm::gamma_type gamma, const plssvm::real_type coef0, const plssvm::real_type cost, const plssvm::sycl::data_parallel_kernel data_parallel_kernel_type, plssvm::mpi::communicator comm) { const plssvm::parameter params{ kernel_type, degree, gamma, coef0, cost }; - return std::make_unique(std::move(comm), target, params, plssvm::sycl_kernel_invocation_type = invocation); + return std::make_unique(std::move(comm), target, params, plssvm::sycl_data_parallel_kernel = data_parallel_kernel_type); }), keyword_args_constructor_docstring.c_str(), py::arg("target") = plssvm::target_platform::automatic, @@ -70,11 +71,11 @@ void bind_dpcpp_csvms(py::module_ &m, const std::string &csvm_name) { py::arg("gamma") = default_params.gamma, py::arg("coef0") = default_params.coef0, py::arg("cost") = default_params.cost, - py::arg("sycl_kernel_invocation_type") = plssvm::sycl::kernel_invocation_type::automatic, + py::arg("sycl_data_parallel_kernel") = plssvm::sycl::data_parallel_kernel::automatic, py::arg("comm") = plssvm::mpi::communicator{}) - .def("get_kernel_invocation_type", &plssvm::dpcpp::csvm::get_kernel_invocation_type, "get the kernel invocation type used in this SYCL C-SVM") + .def("get_data_parallel_kernel", &plssvm::dpcpp::csvm::get_data_parallel_kernel, "get the data parallel kernel used in this SYCL C-SVM") .def("__repr__", [csvm_name](const backend_csvm_type &self) { - return fmt::format("", csvm_name, self.num_available_devices(), self.get_kernel_invocation_type()); + return fmt::format("", csvm_name, self.num_available_devices(), self.get_data_parallel_kernel()); }); } diff --git a/bindings/Python/backends/hip_csvm.cpp b/bindings/Python/backends/hip_csvm.cpp index b1f6ca7f1..76230af67 100644 --- a/bindings/Python/backends/hip_csvm.cpp +++ b/bindings/Python/backends/hip_csvm.cpp @@ -20,6 +20,7 @@ #include "plssvm/svm/csvr.hpp" // plssvm::csvr #include "plssvm/target_platforms.hpp" // plssvm::target_platform +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings #include "bindings/Python/type_caster/mpi_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::mpi::communicator #include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::register_py_exception diff --git a/bindings/Python/backends/hpx_csvm.cpp b/bindings/Python/backends/hpx_csvm.cpp index 91d56d58c..3210dadcb 100644 --- a/bindings/Python/backends/hpx_csvm.cpp +++ b/bindings/Python/backends/hpx_csvm.cpp @@ -21,6 +21,7 @@ #include "plssvm/svm/csvr.hpp" // plssvm::csvr #include "plssvm/target_platforms.hpp" // plssvm::target_platform +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings #include "bindings/Python/type_caster/mpi_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::mpi::communicator #include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::register_py_exception diff --git a/bindings/Python/backends/kokkos_csvm.cpp b/bindings/Python/backends/kokkos_csvm.cpp index d55dddf2a..e96995d79 100644 --- a/bindings/Python/backends/kokkos_csvm.cpp +++ b/bindings/Python/backends/kokkos_csvm.cpp @@ -6,26 +6,27 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "plssvm/backend_types.hpp" // plssvm::kokkos::backend_csvm_type_t -#include "plssvm/backends/Kokkos/csvm.hpp" // plssvm::kokkos::csvm -#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception -#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space -#include "plssvm/constants.hpp" // plssvm::real_type -#include "plssvm/exceptions/exceptions.hpp" // plssvm::exception -#include "plssvm/gamma.hpp" // plssvm::gamma -#include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type -#include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator -#include "plssvm/parameter.hpp" // plssvm::parameter -#include "plssvm/svm/csvc.hpp" // plssvm::csvc -#include "plssvm/svm/csvm.hpp" // plssvm::csvm -#include "plssvm/svm/csvr.hpp" // plssvm::csvr -#include "plssvm/target_platforms.hpp" // plssvm::target_platform - +#include "plssvm/backend_types.hpp" // plssvm::kokkos::backend_csvm_type_t +#include "plssvm/backends/Kokkos/csvm.hpp" // plssvm::kokkos::csvm +#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception +#include "plssvm/backends/Kokkos/execution_spaces.hpp" // plssvm::kokkos::execution_space +#include "plssvm/constants.hpp" // plssvm::real_type +#include "plssvm/exceptions/exceptions.hpp" // plssvm::exception +#include "plssvm/gamma.hpp" // plssvm::gamma +#include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type +#include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator +#include "plssvm/parameter.hpp" // plssvm::parameter +#include "plssvm/svm/csvc.hpp" // plssvm::csvc +#include "plssvm/svm/csvm.hpp" // plssvm::csvm +#include "plssvm/svm/csvr.hpp" // plssvm::csvr +#include "plssvm/target_platforms.hpp" // plssvm::target_platform + +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings #include "bindings/Python/type_caster/mpi_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::mpi::communicator #include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{register_py_exception, register_implicit_str_enum_conversion} #include "fmt/format.h" // fmt::format -#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::arg, py::exception, py::module_local +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::arg, py::exception, py::module_local, py::enum_ #include "pybind11/stl.h" // support for STL types: std::variant #include // std::make_unique @@ -85,7 +86,7 @@ void init_kokkos_csvm(py::module_ &m, const py::exception &ba py::module_ kokkos_module = m.def_submodule("kokkos", "a module containing all Kokkos backend specific functionality"); // bind the enum class - py::enum_ py_enum(kokkos_module, "ExecutionSpace", "Enum class for all supported Kokkos execution spaces in PLSSVM."); + py::enum_ py_enum(kokkos_module, "ExecutionSpace", "enum.Enum", "Enum class for all supported Kokkos execution spaces in PLSSVM."); py_enum .value("AUTOMATIC", plssvm::kokkos::execution_space::automatic, "automatically determine the used Kokkos execution space; note: this does not necessarily correspond to Kokkos::DefaultExecutionSpace!") .value("CUDA", plssvm::kokkos::execution_space::cuda, "execution space representing execution on a CUDA device") diff --git a/bindings/Python/backends/opencl_csvm.cpp b/bindings/Python/backends/opencl_csvm.cpp index 9c3a2f3d0..cde51dad4 100644 --- a/bindings/Python/backends/opencl_csvm.cpp +++ b/bindings/Python/backends/opencl_csvm.cpp @@ -20,6 +20,7 @@ #include "plssvm/svm/csvr.hpp" // plssvm::csvr #include "plssvm/target_platforms.hpp" // plssvm::target_platform +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings #include "bindings/Python/type_caster/mpi_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::mpi::communicator #include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::register_py_exception diff --git a/bindings/Python/backends/openmp_csvm.cpp b/bindings/Python/backends/openmp_csvm.cpp index 095659542..3e3a67699 100644 --- a/bindings/Python/backends/openmp_csvm.cpp +++ b/bindings/Python/backends/openmp_csvm.cpp @@ -20,12 +20,13 @@ #include "plssvm/svm/csvr.hpp" // plssvm::csvr #include "plssvm/target_platforms.hpp" // plssvm::target_platform -#include "bindings/Python/type_caster/mpi_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::mpi::communicator +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings +#include "bindings/Python/type_caster/mpi_type_caster.hpp" // NOLINT: a custom Pybind11 type caster for a plssvm::mpi::communicator #include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::register_py_exception #include "fmt/format.h" // fmt::format #include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::arg, py::exception, py::module_local -#include "pybind11/stl.h" // support for STL types: std::variant +#include "pybind11/stl.h" // NOLINT: support for STL types: std::variant #include // std::make_unique #include // std::string diff --git a/bindings/Python/backends/stdpar_csvm.cpp b/bindings/Python/backends/stdpar_csvm.cpp index df32403e3..84c52c0f3 100644 --- a/bindings/Python/backends/stdpar_csvm.cpp +++ b/bindings/Python/backends/stdpar_csvm.cpp @@ -21,11 +21,12 @@ #include "plssvm/svm/csvr.hpp" // plssvm::csvr #include "plssvm/target_platforms.hpp" // plssvm::target_platform +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings #include "bindings/Python/type_caster/mpi_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::mpi::communicator #include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{register_py_exception, register_implicit_str_enum_conversion} #include "fmt/format.h" // fmt::format -#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::arg, py::exception, py::module_local +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::arg, py::exception, py::module_local, py::enum_ #include "pybind11/stl.h" // support for STL types: std::variant #include // std::make_unique @@ -48,7 +49,7 @@ void bind_stdpar_csvms(py::module_ &m, const std::string &csvm_name) { const std::string params_constructor_docstring{ fmt::format("create an stdpar {} with the provided SVM parameter encapsulated in a plssvm.Parameter", csvm_name) }; const std::string keyword_args_constructor_docstring{ fmt::format("create an stdpar {} with the provided SVM parameter as separate keyword arguments", csvm_name) }; - py::class_(m, csvm_name.c_str()) + py::class_(m, csvm_name.c_str(), class_docstring.c_str()) .def(py::init([](const plssvm::target_platform target, const plssvm::parameter params, plssvm::mpi::communicator comm) { return std::make_unique(std::move(comm), target, params); }), @@ -83,7 +84,7 @@ void init_stdpar_csvm(py::module_ &m, const py::exception &ba py::module_ stdpar_module = m.def_submodule("stdpar", "a module containing all stdpar backend specific functionality"); // bind the enum class - py::enum_ py_enum(stdpar_module, "ImplementationType", "Enum class for all supported stdpar implementations in PLSSVM."); + py::enum_ py_enum(stdpar_module, "ImplementationType", "enum.Enum", "Enum class for all supported stdpar implementations in PLSSVM."); py_enum .value("NVHPC", plssvm::stdpar::implementation_type::nvhpc, "use NVIDIA's HPC SDK (NVHPC) compiler nvc++") .value("ROC_STDPAR", plssvm::stdpar::implementation_type::roc_stdpar, "use AMD's roc-stdpar compiler (patched LLVM)") diff --git a/bindings/Python/backends/sycl.cpp b/bindings/Python/backends/sycl.cpp index 98c27214b..086481e9d 100644 --- a/bindings/Python/backends/sycl.cpp +++ b/bindings/Python/backends/sycl.cpp @@ -6,14 +6,15 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "plssvm/backends/SYCL/exceptions.hpp" // plssvm::sycl::backend_exception -#include "plssvm/backends/SYCL/implementation_types.hpp" // plssvm::sycl::{implementation_type, list_available_sycl_implementations} -#include "plssvm/backends/SYCL/kernel_invocation_types.hpp" // plssvm::sycl::kernel_invocation_type -#include "plssvm/exceptions/exceptions.hpp" // plssvm::exception +#include "plssvm/backends/SYCL/data_parallel_kernels.hpp" // plssvm::sycl::data_parallel_kernel +#include "plssvm/backends/SYCL/exceptions.hpp" // plssvm::sycl::backend_exception +#include "plssvm/backends/SYCL/implementation_types.hpp" // plssvm::sycl::{implementation_type, list_available_sycl_implementations} +#include "plssvm/exceptions/exceptions.hpp" // plssvm::exception -#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{register_py_exception, register_implicit_str_enum_conversion} +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{register_py_exception, register_implicit_str_enum_conversion} -#include "pybind11/pybind11.h" // py::module_, py::enum_, py::exception +#include "pybind11/pybind11.h" // py::module_, py::exception, py::enum_ #include "pybind11/stl.h" // support for STL types: std:vector #include // std::vector @@ -23,9 +24,6 @@ namespace py = pybind11; -py::module_ init_adaptivecpp_csvm(py::module_ &, const py::exception &); -py::module_ init_dpcpp_csvm(py::module_ &, const py::exception &); - void init_sycl(py::module_ &m, const py::exception &base_exception) { // use its own submodule for the SYCL specific bindings py::module_ sycl_module = m.def_submodule("sycl", "a module containing all SYCL backend specific functionality"); @@ -34,7 +32,7 @@ void init_sycl(py::module_ &m, const py::exception &base_exce plssvm::bindings::python::util::register_py_exception(sycl_module, "BackendError", base_exception); // bind the two enum classes - py::enum_ py_enum_impl(sycl_module, "ImplementationType", "Enum class for all supported SYCL implementation in PLSSVM."); + py::enum_ py_enum_impl(sycl_module, "ImplementationType", "enum.Enum", "Enum class for all supported SYCL implementation in PLSSVM."); py_enum_impl .value("AUTOMATIC", plssvm::sycl::implementation_type::automatic, "use the available SYCL implementation; if more than one implementation is available, the macro PLSSVM_SYCL_BACKEND_PREFERRED_IMPLEMENTATION must be defined during the CMake configuration") .value("DPCPP", plssvm::sycl::implementation_type::dpcpp, "use DPC++ as SYCL implementation") @@ -45,16 +43,16 @@ void init_sycl(py::module_ &m, const py::exception &base_exce sycl_module.def("list_available_sycl_implementations", &plssvm::sycl::list_available_sycl_implementations, "list all available SYCL implementations"); - py::enum_ py_enum_invocation(sycl_module, "KernelInvocationType", "Enum class for all possible SYCL kernel invocation types supported in PLSSVM."); - py_enum_invocation - .value("AUTOMATIC", plssvm::sycl::kernel_invocation_type::automatic, "use the best kernel invocation type for the current SYCL implementation and target hardware platform") - .value("BASIC", plssvm::sycl::kernel_invocation_type::basic, "use the basic data parallel kernel invocation type") - .value("WORK_GROUP", plssvm::sycl::kernel_invocation_type::work_group, "use the work-group data parallel kernel invocation type") - .value("HIERARCHICAL", plssvm::sycl::kernel_invocation_type::hierarchical, "use the hierarchical data parallel kernel invocation type") - .value("SCOPED", plssvm::sycl::kernel_invocation_type::scoped, "use the AdaptiveCpp specific scoped parallelism kernel invocation type"); + py::enum_ py_enum_data_parallel_kernel(sycl_module, "DataParallelKernel", "enum.Enum", "Enum class for all possible SYCL data parallel kernels supported in PLSSVM."); + py_enum_data_parallel_kernel + .value("AUTOMATIC", plssvm::sycl::data_parallel_kernel::automatic, "use the best data parallel kernel for the current SYCL implementation and target hardware platform") + .value("BASIC", plssvm::sycl::data_parallel_kernel::basic, "use the basic data parallel kernel") + .value("WORK_GROUP", plssvm::sycl::data_parallel_kernel::work_group, "use the work-group data parallel kernel") + .value("HIERARCHICAL", plssvm::sycl::data_parallel_kernel::hierarchical, "use the hierarchical data parallel kernel") + .value("SCOPED", plssvm::sycl::data_parallel_kernel::scoped, "use the AdaptiveCpp specific scoped parallelism kernel"); // enable implicit conversion from string to enum - plssvm::bindings::python::util::register_implicit_str_enum_conversion(py_enum_invocation); + plssvm::bindings::python::util::register_implicit_str_enum_conversion(py_enum_data_parallel_kernel); // initialize SYCL binding classes #if defined(PLSSVM_SYCL_BACKEND_HAS_ADAPTIVECPP) diff --git a/bindings/Python/bindings_fwd.hpp b/bindings/Python/bindings_fwd.hpp new file mode 100644 index 000000000..ce12e8aa1 --- /dev/null +++ b/bindings/Python/bindings_fwd.hpp @@ -0,0 +1,61 @@ +/** + * @file + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Header forward declaring all helper functions used to create the Python bindings. + */ + +#ifndef PLSSVM_BINDINGS_PYTHON_BINDINGS_FWD_HPP_ +#define PLSSVM_BINDINGS_PYTHON_BINDINGS_FWD_HPP_ +#pragma once + +#include "plssvm/exceptions/exceptions.hpp" // plssvm::exceptions + +#include "pybind11/pybind11.h" // py::module_, py::exception + +namespace py = pybind11; + +// forward declare binding functions +void init_verbosity_levels(py::module_ &m); +void init_performance_tracker(py::module_ &m); +void init_events(py::module_ &m); +void init_target_platforms(py::module_ &m); +void init_solver_types(py::module_ &m); +void init_svm_types(py::module_ &m); +void init_backend_types(py::module_ &m); +void init_gamma(py::module_ &m); +void init_classification_types(py::module_ &m); +void init_file_format_types(py::module_ &m); +void init_kernel_function_types(py::module_ &m); +void init_parameter(py::module_ &m); +void init_kernel_functions(py::module_ &m); +void init_classification_model(py::module_ &m); +void init_regression_model(py::module_ &m); +void init_min_max_scaler(py::module_ &m); +void init_classification_data_set(py::module_ &m); +void init_regression_data_set(py::module_ &m); +void init_exceptions(py::module_ &m, const py::exception &base_exception); +void init_regression_report(py::module_ &m); +void init_csvm(py::module_ &m); +void init_csvc(py::module_ &m); +void init_csvr(py::module_ &m); +void init_openmp_csvm(py::module_ &m, const py::exception &base_exception); +void init_hpx_csvm(py::module_ &m, const py::exception &base_exception); +void init_stdpar_csvm(py::module_ &m, const py::exception &base_exception); +void init_cuda_csvm(py::module_ &m, const py::exception &base_exception); +void init_hip_csvm(py::module_ &m, const py::exception &base_exception); +void init_opencl_csvm(py::module_ &m, const py::exception &base_exception); +void init_sycl(py::module_ &m, const py::exception &base_exception); +py::module_ init_adaptivecpp_csvm(py::module_ &m, const py::exception &base_exception); +py::module_ init_dpcpp_csvm(py::module_ &m, const py::exception &base_exception); +void init_kokkos_csvm(py::module_ &m, const py::exception &base_exception); + +void init_sklearn_tags(py::module_ &m); +void init_sklearn_svc(py::module_ &m); +void init_sklearn_svr(py::module_ &m); + +#endif // PLSSVM_BINDINGS_PYTHON_BINDINGS_FWD_HPP_ diff --git a/bindings/Python/classification_types.cpp b/bindings/Python/classification_types.cpp index 7d145ff84..d96a5b409 100644 --- a/bindings/Python/classification_types.cpp +++ b/bindings/Python/classification_types.cpp @@ -8,17 +8,17 @@ #include "plssvm/classification_types.hpp" // plssvm::classification_type, plssvm::classification_type_to_full_string, plssvm::calculate_number_of_classifiers -#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::register_implicit_str_enum_conversion +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::register_implicit_str_enum_conversion -#include "pybind11/pybind11.h" // py::module_, py::enum_, py::arg - -#include // std::string +#include "pybind11/cast.h" // py::arg +#include "pybind11/pybind11.h" // py::module_, py::arg, py::enum_ namespace py = pybind11; void init_classification_types(py::module_ &m) { // bind enum class - py::enum_ py_enum(m, "ClassificationType", "Enum class for all implemented multiclass classification strategies."); + py::enum_ py_enum(m, "ClassificationType", "enum.Enum", "Enum class for all implemented multiclass classification strategies."); py_enum .value("OAA", plssvm::classification_type::oaa, "use the one vs. all classification strategy (default)") .value("OAO", plssvm::classification_type::oao, "use the one vs. one classification strategy"); diff --git a/bindings/Python/data_set/classification_data_set.cpp b/bindings/Python/data_set/classification_data_set.cpp index 1ffacb2e4..0f560b4b3 100644 --- a/bindings/Python/data_set/classification_data_set.cpp +++ b/bindings/Python/data_set/classification_data_set.cpp @@ -15,18 +15,19 @@ #include "plssvm/matrix.hpp" // plssvm::soa_matrix #include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator -#include "bindings/Python/data_set/variant_wrapper.hpp" // plssvm::bindings::python::util::classification_data_set_wrapper -#include "bindings/Python/type_caster/label_vector_wrapper_caster.hpp" // a custom Pybind11 type caster for a plssvm::bindings::python::util::label_vector_wrapper -#include "bindings/Python/type_caster/matrix_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::matrix -#include "bindings/Python/type_caster/mpi_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::mpi::communicator -#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{create_instance, python_type_name_mapping, vector_to_pyarray} +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings +#include "bindings/Python/data_set/variant_wrapper.hpp" // plssvm::bindings::python::util::classification_data_set_wrapper +#include "bindings/Python/type_caster/label_vector_wrapper_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::bindings::python::util::label_vector_wrapper +#include "bindings/Python/type_caster/matrix_type_caster.hpp" // NOLINT: a custom Pybind11 type caster for a plssvm::matrix +#include "bindings/Python/type_caster/mpi_type_caster.hpp" // NOLINT: a custom Pybind11 type caster for a plssvm::mpi::communicator +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{create_instance, python_type_name_mapping, vector_to_pyarray} #include "fmt/format.h" // fmt::format #include "fmt/ranges.h" // fmt::join -#include "pybind11/numpy.h" // py::array_t, py::array -#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::arg, py::kw_only, py::attribute_error +#include "pybind11/cast.h" // py::arg +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::kw_only, py::attribute_error, py::return_value_policy #include "pybind11/pytypes.h" // py::type -#include "pybind11/stl.h" // support for STL types +#include "pybind11/stl.h" // NOLINT: support for STL types #include // std::make_unique #include // std::optional, std::nullopt @@ -40,20 +41,17 @@ void init_classification_data_set(py::module_ &m) { using plssvm::bindings::python::util::classification_data_set_wrapper; py::class_(m, "ClassificationDataSet", "Encapsulate all necessary data that is needed for training or predicting using an C-SVC.") - .def(py::init([](const std::string &filename, const std::optional type, const plssvm::file_format_type format, const std::optional scaler, plssvm::mpi::communicator comm) { + .def(py::init([](const std::string &filename, const std::optional &type, const plssvm::file_format_type format, const std::optional &scaler, plssvm::mpi::communicator comm) { if (type.has_value()) { if (scaler.has_value()) { return std::make_unique(plssvm::bindings::python::util::create_instance(type.value(), std::move(comm), filename, format, scaler.value())); - } else { - return std::make_unique(plssvm::bindings::python::util::create_instance(type.value(), std::move(comm), filename, format)); - } - } else { - if (scaler.has_value()) { - return std::make_unique(plssvm::classification_data_set{ std::move(comm), filename, format, scaler.value() }); - } else { - return std::make_unique(plssvm::classification_data_set{ std::move(comm), filename, format }); } + return std::make_unique(plssvm::bindings::python::util::create_instance(type.value(), std::move(comm), filename, format)); + } + if (scaler.has_value()) { + return std::make_unique(plssvm::classification_data_set{ std::move(comm), filename, format, scaler.value() }); } + return std::make_unique(plssvm::classification_data_set{ std::move(comm), filename, format }); }), "create a new data set from the provided file and additional optional parameters like the used label type", py::arg("filename"), @@ -62,20 +60,17 @@ void init_classification_data_set(py::module_ &m) { py::arg("format") = plssvm::file_format_type::libsvm, py::arg("scaler") = std::nullopt, py::arg("comm") = plssvm::mpi::communicator{}) - .def(py::init([](plssvm::soa_matrix data, const std::optional type, const std::optional scaler, plssvm::mpi::communicator comm) { + .def(py::init([](plssvm::soa_matrix data, const std::optional &type, const std::optional &scaler, plssvm::mpi::communicator comm) { if (type.has_value()) { if (scaler.has_value()) { return std::make_unique(plssvm::bindings::python::util::create_instance(type.value(), std::move(comm), std::move(data), scaler.value())); - } else { - return std::make_unique(plssvm::bindings::python::util::create_instance(type.value(), std::move(comm), std::move(data))); - } - } else { - if (scaler.has_value()) { - return std::make_unique(plssvm::classification_data_set{ std::move(comm), std::move(data), scaler.value() }); - } else { - return std::make_unique(plssvm::classification_data_set{ std::move(comm), std::move(data) }); } + return std::make_unique(plssvm::bindings::python::util::create_instance(type.value(), std::move(comm), std::move(data))); + } + if (scaler.has_value()) { + return std::make_unique(plssvm::classification_data_set{ std::move(comm), std::move(data), scaler.value() }); } + return std::make_unique(plssvm::classification_data_set{ std::move(comm), std::move(data) }); }), "create a new data set from the provided data and additional optional parameters like the used label type", py::arg("X"), @@ -87,10 +82,9 @@ void init_classification_data_set(py::module_ &m) { return std::visit([&](auto &&labels_vector) { using label_type = typename plssvm::detail::remove_cvref_t::value_type; if (scaler.has_value()) { - return std::make_unique(plssvm::classification_data_set(std::move(comm), std::move(data), std::move(labels_vector), scaler.value())); - } else { - return std::make_unique(plssvm::classification_data_set(std::move(comm), std::move(data), std::move(labels_vector))); + return std::make_unique(plssvm::classification_data_set(std::move(comm), std::move(data), std::forward(labels_vector), scaler.value())); } + return std::make_unique(plssvm::classification_data_set(std::move(comm), std::move(data), std::forward(labels_vector))); }, labels.labels); }), @@ -108,9 +102,8 @@ void init_classification_data_set(py::module_ &m) { return std::visit([](auto &&data) { if (!data.has_labels()) { throw py::attribute_error{ "'ClassificationDataSet' object has no function 'labels'. Maybe this ClassificationDataSet was created without labels?" }; - } else { - return plssvm::bindings::python::util::vector_to_pyarray(data.labels()->get()); } + return plssvm::bindings::python::util::vector_to_pyarray(data.labels()->get()); }, self.data_set); }, "the labels") // clang-format on .def("num_data_points", [](const classification_data_set_wrapper &self) { return std::visit([](auto &&data) { return data.num_data_points(); }, self.data_set); }, "the number of data points in the data set") @@ -119,18 +112,16 @@ void init_classification_data_set(py::module_ &m) { .def("scaling_factors", [](const classification_data_set_wrapper &self) { return std::visit([](auto &&data) { if (!data.is_scaled()) { throw py::attribute_error{ "'ClassificationDataSet' object has no function 'scaling_factors'. Maybe this ClassificationDataSet has not been scaled?" }; - } else { - return data.scaling_factors().value(); - } }, self.data_set); }, py::return_value_policy::reference_internal, "the factors used to scale this data set") + } + return data.scaling_factors().value(); }, self.data_set); }, py::return_value_policy::reference_internal, "the factors used to scale this data set") .def("num_classes", [](const classification_data_set_wrapper &self) { return std::visit([](auto &&data) { return data.num_classes(); }, self.data_set); }, "the number of classes") // clang-format off .def("classes", [](const classification_data_set_wrapper &self) { return std::visit([](auto &&data) { if (!data.has_labels()) { throw py::attribute_error{ "'ClassificationDataSet' object has no function 'classes'. Maybe this ClassificationDataSet was created without labels?" }; - } else { - return plssvm::bindings::python::util::vector_to_pyarray(data.classes().value()); } + return plssvm::bindings::python::util::vector_to_pyarray(data.classes().value()); }, self.data_set); }, "the number of classes") .def("communicator", [](const classification_data_set_wrapper &self) { return std::visit([](auto &&data) { return data.communicator(); }, self.data_set); }, "the associated MPI communicator") .def("__repr__", [](const classification_data_set_wrapper &self) { diff --git a/bindings/Python/data_set/min_max_scaler.cpp b/bindings/Python/data_set/min_max_scaler.cpp index 5779b9e29..3a51725ab 100644 --- a/bindings/Python/data_set/min_max_scaler.cpp +++ b/bindings/Python/data_set/min_max_scaler.cpp @@ -11,14 +11,15 @@ #include "plssvm/constants.hpp" // plssvm::real_type #include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator -#include "bindings/Python/type_caster/mpi_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::mpi::communicator +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings +#include "bindings/Python/type_caster/mpi_type_caster.hpp" // NOLINT: a custom Pybind11 type caster for a plssvm::mpi::communicator #include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::vector_to_pyarray #include "fmt/format.h" // fmt::format #include "pybind11/numpy.h" // py::array #include "pybind11/pybind11.h" // PYBIND11_NUMPY_DTYPE, py::module_, py::class_, py::init, py::arg #include "pybind11/pytypes.h" // py::type -#include "pybind11/stl.h" // support for STL types +#include "pybind11/stl.h" // NOLINT: support for STL types #include // std::array #include // std::size_t @@ -61,7 +62,7 @@ void init_min_max_scaler(py::module_ &m) { py::arg("interval"), py::kw_only(), py::arg("comm") = plssvm::mpi::communicator{}) - .def(py::init([](const py::tuple interval, plssvm::mpi::communicator comm) { + .def(py::init([](const py::tuple &interval, plssvm::mpi::communicator comm) { if (interval.size() != 2) { throw py::value_error{ fmt::format("MinMaxScaler can only be created from two interval values (lower, upper), but {} were provided!", interval.size()) }; } @@ -84,9 +85,8 @@ void init_min_max_scaler(py::module_ &m) { const auto scaling_factors = self.scaling_factors(); if (scaling_factors.has_value()) { return plssvm::bindings::python::util::vector_to_pyarray(scaling_factors.value()); - } else { - return std::nullopt; - } }, "the scaling factors for each feature") + } + return std::nullopt; }, "the scaling factors for each feature") .def("communicator", &plssvm::min_max_scaler::communicator, "the associated MPI communicator") .def("__repr__", [](const plssvm::min_max_scaler &self) { std::string optional_repr{}; diff --git a/bindings/Python/data_set/regression_data_set.cpp b/bindings/Python/data_set/regression_data_set.cpp index c0369ee0d..245abb5d5 100644 --- a/bindings/Python/data_set/regression_data_set.cpp +++ b/bindings/Python/data_set/regression_data_set.cpp @@ -15,18 +15,18 @@ #include "plssvm/matrix.hpp" // plssvm::soa_matrix #include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator -#include "bindings/Python/data_set/variant_wrapper.hpp" // plssvm::bindings::python::util::regression_data_set_wrapper -#include "bindings/Python/type_caster/label_vector_wrapper_caster.hpp" // a custom Pybind11 type caster for a plssvm::bindings::python::util::label_vector_wrapper -#include "bindings/Python/type_caster/matrix_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::matrix -#include "bindings/Python/type_caster/mpi_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::mpi::communicator -#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{create_instance, python_type_name_mapping, vector_to_pyarray} +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings +#include "bindings/Python/data_set/variant_wrapper.hpp" // plssvm::bindings::python::util::regression_data_set_wrapper +#include "bindings/Python/type_caster/label_vector_wrapper_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::bindings::python::util::label_vector_wrapper +#include "bindings/Python/type_caster/matrix_type_caster.hpp" // NOLINT: a custom Pybind11 type caster for a plssvm::matrix +#include "bindings/Python/type_caster/mpi_type_caster.hpp" // NOLINT: a custom Pybind11 type caster for a plssvm::mpi::communicator +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{create_instance, python_type_name_mapping, vector_to_pyarray} #include "fmt/format.h" // fmt::format -#include "fmt/ranges.h" // fmt::join -#include "pybind11/numpy.h" // py::array_t, py::array -#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::arg, py::kw_only, py::object, py::attribute_error +#include "pybind11/cast.h" // py::arg +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::kw_only, py::object, py::attribute_error, py::return_value_policy #include "pybind11/pytypes.h" // py::type -#include "pybind11/stl.h" // support for STL types +#include "pybind11/stl.h" // NOLINT: support for STL types #include // std::make_unique #include // std::optional, std::nullopt @@ -40,20 +40,17 @@ void init_regression_data_set(py::module_ &m) { using plssvm::bindings::python::util::regression_data_set_wrapper; py::class_(m, "RegressionDataSet", "Encapsulate all necessary data that is needed for training or predicting using an C-SVR.") - .def(py::init([](const std::string &filename, const std::optional type, const plssvm::file_format_type format, const std::optional scaler, plssvm::mpi::communicator comm) { + .def(py::init([](const std::string &filename, const std::optional &type, const plssvm::file_format_type format, const std::optional &scaler, plssvm::mpi::communicator comm) { if (type.has_value()) { if (scaler.has_value()) { return std::make_unique(plssvm::bindings::python::util::create_instance(type.value(), std::move(comm), filename, format, scaler.value())); - } else { - return std::make_unique(plssvm::bindings::python::util::create_instance(type.value(), std::move(comm), filename, format)); - } - } else { - if (scaler.has_value()) { - return std::make_unique(plssvm::regression_data_set{ std::move(comm), filename, format, scaler.value() }); - } else { - return std::make_unique(plssvm::regression_data_set{ std::move(comm), filename, format }); } + return std::make_unique(plssvm::bindings::python::util::create_instance(type.value(), std::move(comm), filename, format)); + } + if (scaler.has_value()) { + return std::make_unique(plssvm::regression_data_set{ std::move(comm), filename, format, scaler.value() }); } + return std::make_unique(plssvm::regression_data_set{ std::move(comm), filename, format }); }), "create a new data set from the provided file and additional optional parameters", py::arg("filename"), @@ -62,20 +59,17 @@ void init_regression_data_set(py::module_ &m) { py::arg("format") = plssvm::file_format_type::libsvm, py::arg("scaler") = std::nullopt, py::arg("comm") = plssvm::mpi::communicator{}) - .def(py::init([](plssvm::soa_matrix data, const std::optional type, const std::optional scaler, plssvm::mpi::communicator comm) { + .def(py::init([](plssvm::soa_matrix data, const std::optional &type, const std::optional &scaler, plssvm::mpi::communicator comm) { if (type.has_value()) { if (scaler.has_value()) { return std::make_unique(plssvm::bindings::python::util::create_instance(type.value(), std::move(comm), std::move(data), scaler.value())); - } else { - return std::make_unique(plssvm::bindings::python::util::create_instance(type.value(), std::move(comm), std::move(data))); - } - } else { - if (scaler.has_value()) { - return std::make_unique(plssvm::regression_data_set{ std::move(comm), std::move(data), scaler.value() }); - } else { - return std::make_unique(plssvm::regression_data_set{ std::move(comm), std::move(data) }); } + return std::make_unique(plssvm::bindings::python::util::create_instance(type.value(), std::move(comm), std::move(data))); + } + if (scaler.has_value()) { + return std::make_unique(plssvm::regression_data_set{ std::move(comm), std::move(data), scaler.value() }); } + return std::make_unique(plssvm::regression_data_set{ std::move(comm), std::move(data) }); }), "create a new data set from the provided file and additional optional parameters", py::arg("X"), @@ -87,10 +81,9 @@ void init_regression_data_set(py::module_ &m) { return std::visit([&](auto &&labels_vector) { using label_type = typename plssvm::detail::remove_cvref_t::value_type; if (scaler.has_value()) { - return std::make_unique(plssvm::regression_data_set(std::move(comm), std::move(data), std::move(labels_vector), scaler.value())); - } else { - return std::make_unique(plssvm::regression_data_set(std::move(comm), std::move(data), std::move(labels_vector))); + return std::make_unique(plssvm::regression_data_set(std::move(comm), std::move(data), std::forward(labels_vector), scaler.value())); } + return std::make_unique(plssvm::regression_data_set(std::move(comm), std::move(data), std::forward(labels_vector))); }, labels.labels); }), @@ -108,9 +101,8 @@ void init_regression_data_set(py::module_ &m) { return std::visit([](auto &&data) { if (!data.has_labels()) { throw py::attribute_error{ "'RegressionDataSet' object has no function 'labels'. Maybe this RegressionDataSet was created without labels?" }; - } else { - return plssvm::bindings::python::util::vector_to_pyarray(data.labels()->get()); } + return plssvm::bindings::python::util::vector_to_pyarray(data.labels()->get()); }, self.data_set); }, "the labels") // clang-format on .def("num_data_points", [](const regression_data_set_wrapper &self) { return std::visit([](auto &&data) { return data.num_data_points(); }, self.data_set); }, "the number of data points in the data set") @@ -119,9 +111,8 @@ void init_regression_data_set(py::module_ &m) { .def("scaling_factors", [](const regression_data_set_wrapper &self) { return std::visit([](auto &&data) { if (!data.is_scaled()) { throw py::attribute_error{ "'RegressionDataSet' object has no function 'scaling_factors'. Maybe this RegressionDataSet has not been scaled?" }; - } else { - return data.scaling_factors().value(); - } }, self.data_set); }, py::return_value_policy::reference_internal, "the factors used to scale this data set") + } + return data.scaling_factors().value(); }, self.data_set); }, py::return_value_policy::reference_internal, "the factors used to scale this data set") // clang-format off .def("communicator", [](const regression_data_set_wrapper &self) { return std::visit([](auto &&data) { return data.communicator(); }, self.data_set); }, "the associated MPI communicator") .def("__repr__", [](const regression_data_set_wrapper &self) { diff --git a/bindings/Python/data_set/variant_wrapper.hpp b/bindings/Python/data_set/variant_wrapper.hpp index 46d853bf5..f9670427c 100644 --- a/bindings/Python/data_set/variant_wrapper.hpp +++ b/bindings/Python/data_set/variant_wrapper.hpp @@ -9,8 +9,8 @@ * @brief Variant wrapper structs around PLSSVM classification and regression data sets. Used that we don't have to expose templates to the Python bindings. */ -#ifndef PLSSVM_BINDINGS_PYTHON_DATA_SET_WRAPPER_HPP_ -#define PLSSVM_BINDINGS_PYTHON_DATA_SET_WRAPPER_HPP_ +#ifndef PLSSVM_BINDINGS_PYTHON_DATA_SET_VARIANT_WRAPPER_HPP_ +#define PLSSVM_BINDINGS_PYTHON_DATA_SET_VARIANT_WRAPPER_HPP_ #pragma once #include "plssvm/data_set/classification_data_set.hpp" // plssvm::classification_data_set @@ -116,4 +116,4 @@ struct regression_data_set_wrapper { } // namespace plssvm::bindings::python::util -#endif // PLSSVM_BINDINGS_PYTHON_DATA_SET_WRAPPER_HPP_ +#endif // PLSSVM_BINDINGS_PYTHON_DATA_SET_VARIANT_WRAPPER_HPP_ diff --git a/bindings/Python/detail/tracking/events.cpp b/bindings/Python/detail/tracking/events.cpp index d08cfe5de..f2d3d89f5 100644 --- a/bindings/Python/detail/tracking/events.cpp +++ b/bindings/Python/detail/tracking/events.cpp @@ -8,17 +8,20 @@ #include "plssvm/detail/tracking/events.hpp" // plssvm::detail::tracking::events -#include "fmt/chrono.h" // format std::chrono types +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings + +#include "fmt/chrono.h" // NOLINT: format std::chrono types #include "fmt/format.h" // fmt::format -#include "pybind11/chrono.h" // bind std::chrono types -#include "pybind11/pybind11.h" // py::module_ -#include "pybind11/stl.h" // bind STL types +#include "pybind11/cast.h" // py::arg +#include "pybind11/chrono.h" // NOLINT: bind std::chrono types +#include "pybind11/pybind11.h" // py::module_, py::overload_cast +#include "pybind11/stl.h" // NOLINT: bind STL types namespace py = pybind11; void init_events(py::module_ &m) { // use a detail.tracking.PerformanceTracker submodule for the performance tracking bindings - py::module_ tracking_module = m.def_submodule("performance_tracking", "a module containing performance tracking functionality"); + const py::module_ tracking_module = m.def_submodule("performance_tracking", "a module containing performance tracking functionality"); using event_type = plssvm::detail::tracking::events::event; diff --git a/bindings/Python/detail/tracking/performance_tracker.cpp b/bindings/Python/detail/tracking/performance_tracker.cpp index dcf0f0616..c3954bf74 100644 --- a/bindings/Python/detail/tracking/performance_tracker.cpp +++ b/bindings/Python/detail/tracking/performance_tracker.cpp @@ -11,9 +11,12 @@ #include "plssvm/detail/tracking/events.hpp" // plssvm::detail::tracking::events #include "plssvm/parameter.hpp" // plssvm::parameter -#include "pybind11/chrono.h" // automatic bindings for std::chrono::milliseconds -#include "pybind11/pybind11.h" // py::module_ -#include "pybind11/stl.h" // automatic bindings for std::optional and std::vector +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings + +#include "pybind11/cast.h" // py::arg +#include "pybind11/chrono.h" // NOLINT: automatic bindings for std::chrono::milliseconds +#include "pybind11/pybind11.h" // py::module_, py::return_value_policy +#include "pybind11/stl.h" // NOLINT: automatic bindings for std::optional and std::vector #include // std::chrono::steady_clock::time_point #include // std::string diff --git a/bindings/Python/exceptions/exceptions.cpp b/bindings/Python/exceptions/exceptions.cpp index 71c1f7a92..084702f09 100644 --- a/bindings/Python/exceptions/exceptions.cpp +++ b/bindings/Python/exceptions/exceptions.cpp @@ -8,7 +8,8 @@ #include "plssvm/exceptions/exceptions.hpp" // PLSSVM specific exceptions -#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::register_py_exception +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::register_py_exception #include "pybind11/pybind11.h" // py::module_, py::exception diff --git a/bindings/Python/file_format_types.cpp b/bindings/Python/file_format_types.cpp index 71a891da0..e8502cea1 100644 --- a/bindings/Python/file_format_types.cpp +++ b/bindings/Python/file_format_types.cpp @@ -8,7 +8,8 @@ #include "plssvm/file_format_types.hpp" // plssvm::file_format_type -#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::register_implicit_str_enum_conversion +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::register_implicit_str_enum_conversion #include "pybind11/pybind11.h" // py::module_, py::enum_ @@ -16,7 +17,7 @@ namespace py = pybind11; void init_file_format_types(py::module_ &m) { // bind enum class - py::enum_ py_enum(m, "FileFormatType", "Enum class for all supported file types."); + py::enum_ py_enum(m, "FileFormatType", "enum.Enum", "Enum class for all supported file types."); py_enum .value("LIBSVM", plssvm::file_format_type::libsvm, "the LIBSVM file format (default); for the file format specification see: https://www.csie.ntu.edu.tw/~cjlin/libsvm/faq.html") .value("ARFF", plssvm::file_format_type::arff, "the ARFF file format; for the file format specification see: https://www.cs.waikato.ac.nz/~ml/weka/arff.html"); diff --git a/bindings/Python/gamma.cpp b/bindings/Python/gamma.cpp index 207c85745..181803976 100644 --- a/bindings/Python/gamma.cpp +++ b/bindings/Python/gamma.cpp @@ -11,17 +11,19 @@ #include "plssvm/constants.hpp" // plssvm::real_type #include "plssvm/matrix.hpp" // plssvm::aos_matrix -#include "bindings/Python/type_caster/matrix_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::matrix +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings +#include "bindings/Python/type_caster/matrix_type_caster.hpp" // NOLINT: a custom Pybind11 type caster for a plssvm::matrix #include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::register_implicit_str_enum_conversion +#include "pybind11/cast.h" // py::arg #include "pybind11/pybind11.h" // py::module_, py::enum_ -#include "pybind11/stl.h" // support for STL types: std::variant +#include "pybind11/stl.h" // NOLINT: support for STL types: std::variant namespace py = pybind11; void init_gamma(py::module_ &m) { // bind enum class - py::enum_ py_enum(m, "GammaCoefficientType", "Enum class for all possible gamma coefficient types (can also be a number)."); + py::enum_ py_enum(m, "GammaCoefficientType", "enum.Enum", "Enum class for all possible gamma coefficient types (can also be a number)."); py_enum .value("AUTOMATIC", plssvm::gamma_coefficient_type::automatic, "use a dynamic gamma value of 1 / num_features for the kernel functions") .value("SCALE", plssvm::gamma_coefficient_type::scale, "use a dynamic gamma value of 1 / (num_features * data.var()) for the kernel functions"); diff --git a/bindings/Python/kernel_function_types.cpp b/bindings/Python/kernel_function_types.cpp index ca8c45c51..22d3f4a89 100644 --- a/bindings/Python/kernel_function_types.cpp +++ b/bindings/Python/kernel_function_types.cpp @@ -8,9 +8,11 @@ #include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type -#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::register_implicit_str_enum_conversion +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::register_implicit_str_enum_conversion -#include "pybind11/pybind11.h" // py::module_, py::enum_, py::arg +#include "pybind11/cast.h" // py::arg +#include "pybind11/pybind11.h" // py::module_, py::enum_ namespace py = pybind11; diff --git a/bindings/Python/kernel_functions.cpp b/bindings/Python/kernel_functions.cpp index 84d28430a..a714f3868 100644 --- a/bindings/Python/kernel_functions.cpp +++ b/bindings/Python/kernel_functions.cpp @@ -13,8 +13,11 @@ #include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type #include "plssvm/parameter.hpp" // plssvm::parameter +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings + +#include "pybind11/cast.h" // py::arg #include "pybind11/pybind11.h" // py::module_, py::arg, py::kw_only -#include "pybind11/stl.h" // support for STL types: std::vector +#include "pybind11/stl.h" // NOLINT: support for STL types: std::vector #include // std::holds_alternative, std::get #include // std::vector @@ -29,11 +32,11 @@ void init_kernel_functions(py::module_ &m) { "polynomial_kernel_function", [](const std::vector &x, const std::vector &y, const int degree, const plssvm::gamma_type gamma, const plssvm::real_type coef0) { if (std::holds_alternative(gamma)) { return plssvm::kernel_function(x, y, degree, std::get(gamma), coef0); - } else if (std::get(gamma) == plssvm::gamma_coefficient_type::automatic) { + } + if (std::get(gamma) == plssvm::gamma_coefficient_type::automatic) { return plssvm::kernel_function(x, y, degree, plssvm::real_type{ 1.0 } / static_cast(x.size()), coef0); - } else { - throw py::value_error{ "Can't use the 'scale' gamma option since the required variance can't be calculated!" }; } + throw py::value_error{ "Can't use the 'scale' gamma option since the required variance can't be calculated!" }; }, "apply the polynomial kernel function to two vectors", py::arg("x"), @@ -46,11 +49,11 @@ void init_kernel_functions(py::module_ &m) { "rbf_kernel_function", [](const std::vector &x, const std::vector &y, const plssvm::gamma_type gamma) { if (std::holds_alternative(gamma)) { return plssvm::kernel_function(x, y, std::get(gamma)); - } else if (std::get(gamma) == plssvm::gamma_coefficient_type::automatic) { + } + if (std::get(gamma) == plssvm::gamma_coefficient_type::automatic) { return plssvm::kernel_function(x, y, plssvm::real_type{ 1.0 } / static_cast(x.size())); - } else { - throw py::value_error{ "Can't use the 'scale' gamma option since the required variance can't be calculated!" }; } + throw py::value_error{ "Can't use the 'scale' gamma option since the required variance can't be calculated!" }; }, "apply the radial basis function kernel function to two vectors", py::arg("x"), @@ -61,11 +64,11 @@ void init_kernel_functions(py::module_ &m) { "sigmoid_kernel_function", [](const std::vector &x, const std::vector &y, const plssvm::gamma_type gamma, const plssvm::real_type coef0) { if (std::holds_alternative(gamma)) { return plssvm::kernel_function(x, y, std::get(gamma), coef0); - } else if (std::get(gamma) == plssvm::gamma_coefficient_type::automatic) { + } + if (std::get(gamma) == plssvm::gamma_coefficient_type::automatic) { return plssvm::kernel_function(x, y, plssvm::real_type{ 1.0 } / static_cast(x.size()), coef0); - } else { - throw py::value_error{ "Can't use the 'scale' gamma option since the required variance can't be calculated!" }; } + throw py::value_error{ "Can't use the 'scale' gamma option since the required variance can't be calculated!" }; }, "apply the sigmoid kernel function to two vectors", py::arg("x"), @@ -77,11 +80,11 @@ void init_kernel_functions(py::module_ &m) { "laplacian_kernel_function", [](const std::vector &x, const std::vector &y, const plssvm::gamma_type gamma) { if (std::holds_alternative(gamma)) { return plssvm::kernel_function(x, y, std::get(gamma)); - } else if (std::get(gamma) == plssvm::gamma_coefficient_type::automatic) { + } + if (std::get(gamma) == plssvm::gamma_coefficient_type::automatic) { return plssvm::kernel_function(x, y, plssvm::real_type{ 1.0 } / static_cast(x.size())); - } else { - throw py::value_error{ "Can't use the 'scale' gamma option since the required variance can't be calculated!" }; } + throw py::value_error{ "Can't use the 'scale' gamma option since the required variance can't be calculated!" }; }, "apply the laplacian kernel function to two vectors", py::arg("x"), @@ -92,11 +95,11 @@ void init_kernel_functions(py::module_ &m) { "chi_squared_kernel_function", [](const std::vector &x, const std::vector &y, const plssvm::gamma_type gamma) { if (std::holds_alternative(gamma)) { return plssvm::kernel_function(x, y, std::get(gamma)); - } else if (std::get(gamma) == plssvm::gamma_coefficient_type::automatic) { + } + if (std::get(gamma) == plssvm::gamma_coefficient_type::automatic) { return plssvm::kernel_function(x, y, plssvm::real_type{ 1.0 } / static_cast(x.size())); - } else { - throw py::value_error{ "Can't use the 'scale' gamma option since the required variance can't be calculated!" }; } + throw py::value_error{ "Can't use the 'scale' gamma option since the required variance can't be calculated!" }; }, "apply the chi-squared kernel function to two vectors", py::arg("x"), @@ -109,17 +112,18 @@ void init_kernel_functions(py::module_ &m) { if (params.kernel_type == plssvm::kernel_function_type::linear) { // gamma doesn't matter in the linear kernel function -> simply call the kernel return plssvm::kernel_function(x, y, params); - } else if (std::holds_alternative(params.gamma)) { + } + if (std::holds_alternative(params.gamma)) { // the gamma value matters, but already is a real_type -> simply call the kernel return plssvm::kernel_function(x, y, params); - } else if (std::get(params.gamma) == plssvm::gamma_coefficient_type::automatic) { + } + if (std::get(params.gamma) == plssvm::gamma_coefficient_type::automatic) { // the gamma value matters and is automatic -> convert it to a real_type params.gamma = plssvm::real_type{ 1.0 } / static_cast(x.size()); return plssvm::kernel_function(x, y, params); - } else { - // the gamma value matters and is scale -> not supported - throw py::value_error{ "Can't use the 'scale' gamma option since the required variance can't be calculated!" }; } + // the gamma value matters and is scale -> not supported + throw py::value_error{ "Can't use the 'scale' gamma option since the required variance can't be calculated!" }; }, "apply the kernel function defined in the parameter object to two vectors", py::arg("x"), diff --git a/bindings/Python/main.cpp b/bindings/Python/main.cpp index 777317de7..68a38e2a6 100644 --- a/bindings/Python/main.cpp +++ b/bindings/Python/main.cpp @@ -16,48 +16,14 @@ #include "plssvm/verbosity_levels.hpp" // plssvm::verbosity_level #include "plssvm/version/version.hpp" // plssvm::version::{version, major, minor, patch} -#include "pybind11/pybind11.h" // PYBIND11_MODULE, py::module_, py::exception, py::register_exception_translator, py::make_tuple +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings + +#include "pybind11/cast.h" // py::make_tuple +#include "pybind11/pybind11.h" // PYBIND11_MODULE, py::module_, py::exception, py::register_exception_translator #include "pybind11/pytypes.h" // py::set_error #include // std::exception_ptr, std::rethrow_exception -namespace py = pybind11; - -// forward declare binding functions -void init_verbosity_levels(py::module_ &); -void init_performance_tracker(py::module_ &); -void init_events(py::module_ &); -void init_target_platforms(py::module_ &); -void init_solver_types(py::module_ &); -void init_svm_types(py::module_ &); -void init_backend_types(py::module_ &); -void init_gamma(py::module_ &); -void init_classification_types(py::module_ &); -void init_file_format_types(py::module_ &); -void init_kernel_function_types(py::module_ &); -void init_parameter(py::module_ &); -void init_kernel_functions(py::module_ &); -void init_classification_model(py::module_ &); -void init_regression_model(py::module_ &); -void init_min_max_scaler(py::module_ &); -void init_classification_data_set(py::module_ &); -void init_regression_data_set(py::module_ &); -void init_exceptions(py::module_ &, const py::exception &); -void init_regression_report(py::module_ &); -void init_csvm(py::module_ &); -void init_csvc(py::module_ &); -void init_csvr(py::module_ &); -void init_openmp_csvm(py::module_ &, const py::exception &); -void init_hpx_csvm(py::module_ &, const py::exception &); -void init_stdpar_csvm(py::module_ &, const py::exception &); -void init_cuda_csvm(py::module_ &, const py::exception &); -void init_hip_csvm(py::module_ &, const py::exception &); -void init_opencl_csvm(py::module_ &, const py::exception &); -void init_sycl(py::module_ &, const py::exception &); -void init_kokkos_csvm(py::module_ &, const py::exception &); -void init_sklearn_svc(py::module_ &); -void init_sklearn_svr(py::module_ &); - PYBIND11_MODULE(plssvm, m) { m.doc() = "PLSSVM - Parallel Least Squares Support Vector Machine"; m.attr("__version__") = plssvm::version::version; @@ -98,8 +64,8 @@ PYBIND11_MODULE(plssvm, m) { })); // register PLSSVM base exception - static py::exception base_exception(m, "PLSSVMError"); - py::register_exception_translator([](std::exception_ptr p) { + static const py::exception base_exception(m, "PLSSVMError"); + py::register_exception_translator([](std::exception_ptr p) { // NOLINT: must be copied for each invocation try { if (p) { std::rethrow_exception(p); @@ -166,6 +132,7 @@ PYBIND11_MODULE(plssvm, m) { #endif py::module_ sklearn_like_svm_model = m.def_submodule("svm", "a module containing the sklearn like SVC and SVR implementations"); + init_sklearn_tags(sklearn_like_svm_model); init_sklearn_svc(sklearn_like_svm_model); init_sklearn_svr(sklearn_like_svm_model); } diff --git a/bindings/Python/model/classification_model.cpp b/bindings/Python/model/classification_model.cpp index c6bf180f9..9cc704b8d 100644 --- a/bindings/Python/model/classification_model.cpp +++ b/bindings/Python/model/classification_model.cpp @@ -13,15 +13,18 @@ #include "plssvm/matrix.hpp" // plssvm::aos_matrix #include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings #include "bindings/Python/model/variant_wrapper.hpp" // plssvm::bindings::python::util::classification_model_wrapper -#include "bindings/Python/type_caster/mpi_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::mpi::communicator +#include "bindings/Python/type_caster/mpi_type_caster.hpp" // NOLINT: a custom Pybind11 type caster for a plssvm::mpi::communicator #include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{python_type_name_mapping, create_instance, vector_to_pyarray} #include "fmt/format.h" // fmt::format #include "fmt/ranges.h" // fmt::join -#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::arg, py::kw_only, py::array, py::list -#include "pybind11/pytypes.h" // py::type -#include "pybind11/stl.h" // support for STL types: std::vector +#include "pybind11/cast.h" // py::arg, py::kw_only, py::cast +#include "pybind11/numpy.h" // py::array +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init +#include "pybind11/pytypes.h" // py::type py::list +#include "pybind11/stl.h" // NOLINT: support for STL types #include // std::make_unique #include // std::optional, std::make_optional, std::nullopt @@ -35,19 +38,18 @@ void init_classification_model(py::module_ &m) { using plssvm::bindings::python::util::classification_model_wrapper; py::class_(m, "ClassificationModel", "Implements a class encapsulating the result of a call to the C-SVC fit function. A model is used to predict the labels of a new data set.") - .def(py::init([](const std::string &filename, const std::optional type, plssvm::mpi::communicator comm) { + .def(py::init([](const std::string &filename, const std::optional &type, plssvm::mpi::communicator comm) { if (type.has_value()) { return std::make_unique(plssvm::bindings::python::util::create_instance(type.value(), std::move(comm), filename)); - } else { - return std::make_unique(plssvm::classification_model{ std::move(comm), filename }); } + return std::make_unique(plssvm::classification_model{ std::move(comm), filename }); }), "load a previously learned classification model from a file", py::arg("filename"), py::kw_only(), py::arg("type") = std::nullopt, py::arg("comm") = plssvm::mpi::communicator{}) - .def("save", [](const classification_model_wrapper &self, const std::string &filename) { return std::visit([&filename](auto &&model) { model.save(filename); }, self.model); }, "save the current model to a file", py::arg("filename")) + .def("save", [](const classification_model_wrapper &self, const std::string &filename) { std::visit([&filename](auto &&model) { model.save(filename); }, self.model); }, "save the current model to a file", py::arg("filename")) .def("num_support_vectors", [](const classification_model_wrapper &self) { return std::visit([](auto &&model) { return model.num_support_vectors(); }, self.model); }, "the number of support vectors (note: all training points become support vectors for LS-SVMs)") .def("num_features", [](const classification_model_wrapper &self) { return std::visit([](auto &&model) { return model.num_features(); }, self.model); }, "the number of features of the support vectors") .def("get_params", [](const classification_model_wrapper &self) { return std::visit([](auto &&model) { return model.get_params(); }, self.model); }, "the C-SVC hyper-parameters used to learn this model") @@ -57,9 +59,8 @@ void init_classification_model(py::module_ &m) { return std::visit([](auto &&model) -> std::optional { if (model.labels().has_value()) { return std::make_optional(plssvm::bindings::python::util::vector_to_pyarray(model.labels()->get())); - } else { - return std::nullopt; } + return std::nullopt; }, self.model); }, "the labels") .def("weights", [](const classification_model_wrapper &self) { return std::visit([](auto &&model) { diff --git a/bindings/Python/model/regression_model.cpp b/bindings/Python/model/regression_model.cpp index 5c22f6147..6cbbdfbbf 100644 --- a/bindings/Python/model/regression_model.cpp +++ b/bindings/Python/model/regression_model.cpp @@ -13,15 +13,18 @@ #include "plssvm/matrix.hpp" // plssvm::aos_matrix #include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings #include "bindings/Python/model/variant_wrapper.hpp" // plssvm::bindings::python::util::regression_model_wrapper -#include "bindings/Python/type_caster/mpi_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::mpi::communicator +#include "bindings/Python/type_caster/mpi_type_caster.hpp" // NOLINT: a custom Pybind11 type caster for a plssvm::mpi::communicator #include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{python_type_name_mapping, create_instance, vector_to_pyarray} #include "fmt/format.h" // fmt::format #include "fmt/ranges.h" // fmt::join -#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::arg, py::kw_only, py::array, py::list +#include "pybind11/cast.h" // py::cast, py::arg +#include "pybind11/numpy.h" // py::array +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::kw_only, py::array, py::list #include "pybind11/pytypes.h" // py::type -#include "pybind11/stl.h" // support for STL types: std::vector +#include "pybind11/stl.h" // NOLINT: support for STL types: std::vector #include // std::make_unique #include // std::optional, std::make_optional, std::nullopt @@ -35,19 +38,18 @@ void init_regression_model(py::module_ &m) { using plssvm::bindings::python::util::regression_model_wrapper; py::class_(m, "RegressionModel", "Implements a class encapsulating the result of a call to the C-SVR fit function. A model is used to predict the labels of a new data set.") - .def(py::init([](const std::string &filename, const std::optional type, plssvm::mpi::communicator comm) { + .def(py::init([](const std::string &filename, const std::optional &type, plssvm::mpi::communicator comm) { if (type.has_value()) { return std::make_unique(plssvm::bindings::python::util::create_instance(type.value(), std::move(comm), filename)); - } else { - return std::make_unique(plssvm::regression_model{ std::move(comm), filename }); } + return std::make_unique(plssvm::regression_model{ std::move(comm), filename }); }), "load a previously learned regression model from a file", py::arg("filename"), py::kw_only(), py::arg("type") = std::nullopt, py::arg("comm") = plssvm::mpi::communicator{}) - .def("save", [](const regression_model_wrapper &self, const std::string &filename) { return std::visit([&filename](auto &&model) { model.save(filename); }, self.model); }, "save the current model to a file", py::arg("filename")) + .def("save", [](const regression_model_wrapper &self, const std::string &filename) { std::visit([&filename](auto &&model) { model.save(filename); }, self.model); }, "save the current model to a file", py::arg("filename")) .def("num_support_vectors", [](const regression_model_wrapper &self) { return std::visit([](auto &&model) { return model.num_support_vectors(); }, self.model); }, "the number of support vectors (note: all training points become support vectors for LS-SVMs)") .def("num_features", [](const regression_model_wrapper &self) { return std::visit([](auto &&model) { return model.num_features(); }, self.model); }, "the number of features of the support vectors") .def("get_params", [](const regression_model_wrapper &self) { return std::visit([](auto &&model) { return model.get_params(); }, self.model); }, "the C-SVR hyper-parameters used to learn this model") @@ -57,9 +59,8 @@ void init_regression_model(py::module_ &m) { return std::visit([](auto &&model) -> std::optional { if (model.labels().has_value()) { return std::make_optional(plssvm::bindings::python::util::vector_to_pyarray(model.labels()->get())); - } else { - return std::nullopt; } + return std::nullopt; }, self.model); }, "the labels") .def("weights", [](const regression_model_wrapper &self) { return std::visit([](auto &&model) { diff --git a/bindings/Python/model/variant_wrapper.hpp b/bindings/Python/model/variant_wrapper.hpp index 908a058af..77f0734ff 100644 --- a/bindings/Python/model/variant_wrapper.hpp +++ b/bindings/Python/model/variant_wrapper.hpp @@ -9,8 +9,8 @@ * @brief Variant wrapper structs around PLSSVM classification and regression models. Used that we don't have to expose templates to the Python bindings. */ -#ifndef PLSSVM_BINDINGS_PYTHON_MODEL_WRAPPER_HPP_ -#define PLSSVM_BINDINGS_PYTHON_MODEL_WRAPPER_HPP_ +#ifndef PLSSVM_BINDINGS_PYTHON_MODEL_VARIANT_WRAPPER_HPP_ +#define PLSSVM_BINDINGS_PYTHON_MODEL_VARIANT_WRAPPER_HPP_ #pragma once #include "plssvm/model/classification_model.hpp" // plssvm::classification_model @@ -94,4 +94,4 @@ struct regression_model_wrapper { } // namespace plssvm::bindings::python::util -#endif // PLSSVM_BINDINGS_PYTHON_MODEL_WRAPPER_HPP_ +#endif // PLSSVM_BINDINGS_PYTHON_MODEL_VARIANT_WRAPPER_HPP_ diff --git a/bindings/Python/parameter.cpp b/bindings/Python/parameter.cpp index bd419e06f..2820b6d91 100644 --- a/bindings/Python/parameter.cpp +++ b/bindings/Python/parameter.cpp @@ -12,10 +12,17 @@ #include "plssvm/gamma.hpp" // plssvm::gamma_type #include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings + #include "fmt/format.h" // fmt::format +#include "pybind11/cast.h" // py::make_tuple #include "pybind11/operators.h" // support for operators -#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::return_value_policy, py::self -#include "pybind11/stl.h" // support for STL types +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::self, py::pickle, py::return_value_policy +#include "pybind11/pytypes.h" // py::tuple +#include "pybind11/stl.h" // NOLINT: support for STL types + +#include // std::size_t +#include // std::runtime_error namespace py = pybind11; @@ -64,8 +71,8 @@ void init_parameter(py::module_ &m) { py::return_value_policy::reference, "change the cost parameter for the C-SVM") .def("equivalent", &plssvm::parameter::equivalent, "check whether two parameter objects are equivalent, i.e., the SVM hyper-parameters important for the current 'kernel_type' are the same") - .def(py::self == py::self, "check whether two parameter objects are identical") - .def(py::self != py::self, "check whether two parameter objects are different") + .def(py::self == py::self, "check whether two parameter objects are identical") // NOLINT(misc-redundant-expression): the idiomatic Pybind11 way + .def(py::self != py::self, "check whether two parameter objects are different") // NOLINT(misc-redundant-expression): the idiomatic Pybind11 way .def("__repr__", [](const plssvm::parameter &self) { return fmt::format("", self.kernel_type, @@ -73,7 +80,24 @@ void init_parameter(py::module_ &m) { self.gamma, self.coef0, self.cost); - }); + }) + .def(py::pickle( + // clang-format off + [](const plssvm::parameter &self) { // __getstate__ + // return a tuple that fully encodes the state of the object + return py::make_tuple(self.kernel_type, self.degree, self.gamma, self.coef0, self.cost); + }, + [](py::tuple t) { // NOLINT: __setstate__ + constexpr std::size_t num_member_variables_of_plssvm_parameter = 5; + if (t.size() != num_member_variables_of_plssvm_parameter) { + throw std::runtime_error{ "Invalid state!" }; + } + // create a new C++ instance + return plssvm::parameter{ t[0].cast(), t[1].cast(), t[2].cast(), t[3].cast(), t[4].cast() }; + } + ) + // clang-format on + ); // bind free functions m.def("equivalent", &plssvm::equivalent, "check whether two parameter objects are equivalent, i.e., the SVM hyper-parameters important for the current 'kernel_type' are the same"); diff --git a/bindings/Python/regression_report.cpp b/bindings/Python/regression_report.cpp index 55dabfcb9..48a0728fd 100644 --- a/bindings/Python/regression_report.cpp +++ b/bindings/Python/regression_report.cpp @@ -10,13 +10,14 @@ #include "plssvm/detail/type_traits.hpp" // plssvm::detail::remove_cvref_t -#include "bindings/Python/data_set/variant_wrapper.hpp" // plssvm::bindings::python::util::regression_data_set_wrapper -#include "bindings/Python/type_caster/label_vector_wrapper_caster.hpp" // a custom Pybind11 type caster for a plssvm::bindings::python::util::label_vector_wrapper +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings +#include "bindings/Python/data_set/variant_wrapper.hpp" // plssvm::bindings::python::util::regression_data_set_wrapper +#include "bindings/Python/type_caster/label_vector_wrapper_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::bindings::python::util::label_vector_wrapper #include "fmt/format.h" // fmt::format #include "pybind11/pybind11.h" // py::module_, py::init, py::arg, py::kw_only, py::value_error #include "pybind11/pytypes.h" // py::object -#include "pybind11/stl.h" // support for STL types +#include "pybind11/stl.h" // NOLINT: support for STL types #include // std::string #include // std::visit, std::get @@ -49,8 +50,7 @@ void init_regression_report(py::module_ &m) { dict["r2_score"] = metrics.r2_score; dict["squared_correlation_coefficient"] = metrics.squared_correlation_coefficient; return dict; - } else { - return py::str(fmt::format("{}", report)); } + return py::str(fmt::format("{}", report)); }, y_true.labels); }, "create a new regression report by calculating all metrics between the correct and predicted labels", py::arg("y_true"), py::arg("y_pred"), py::kw_only(), py::arg("force_finite") = true, py::arg("output_dict") = false); } diff --git a/bindings/Python/sklearn_like/svc.cpp b/bindings/Python/sklearn_like/svc.cpp index 0c1e53f2e..edeb5ebe7 100644 --- a/bindings/Python/sklearn_like/svc.cpp +++ b/bindings/Python/sklearn_like/svc.cpp @@ -7,7 +7,7 @@ */ #include "plssvm/classification_types.hpp" // plssvm::classification_type -#include "plssvm/constants.hpp" // plssvm::real_type +#include "plssvm/constants.hpp" // plssvm::real_type, plssvm::DEFAULT_EPSILON #include "plssvm/csvm_factory.hpp" // plssvm::make_csvc #include "plssvm/data_set/classification_data_set.hpp" // plssvm::classification_data_set #include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT @@ -20,30 +20,33 @@ #include "plssvm/svm/csvc.hpp" // plssvm::csvc #include "plssvm/verbosity_levels.hpp" // plssvm::verbosity_level, plssvm::verbosity -#include "bindings/Python/data_set/variant_wrapper.hpp" // plssvm::bindings::python::util::classification_data_set_wrapper -#include "bindings/Python/model/variant_wrapper.hpp" // plssvm::bindings::python::util::classification_model_wrapper -#include "bindings/Python/type_caster/label_vector_wrapper_caster.hpp" // a custom Pybind11 type caster for a plssvm::bindings::python::label_vector_wrapper -#include "bindings/Python/type_caster/matrix_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::matrix -#include "bindings/Python/type_caster/matrix_wrapper_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::bindings::python::util::matrix_wrapper -#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{check_kwargs_for_correctness, vector_to_pyarray} - -#include "fmt/format.h" // fmt::format -#include "fmt/ranges.h" // fmt::join -#include "pybind11/cast.h" // py::cast -#include "pybind11/numpy.h" // support for STL types -#include "pybind11/operators.h" // support for operators -#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::arg, py::return_value_policy, py::self, py::dynamic_attr, py::value_error, py::attribute_error -#include "pybind11/pytypes.h" // py::dict, py::kwargs, py::str -#include "pybind11/stl.h" // support for STL types +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings +#include "bindings/Python/data_set/variant_wrapper.hpp" // plssvm::bindings::python::util::classification_data_set_wrapper +#include "bindings/Python/model/variant_wrapper.hpp" // plssvm::bindings::python::util::classification_model_wrapper +#include "bindings/Python/sklearn_like/tags.hpp" // Tags, TargetTags, TransformerTags, ClassifierTags, RegressorTags, InputTags +#include "bindings/Python/type_caster/label_vector_wrapper_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::bindings::python::label_vector_wrapper +#include "bindings/Python/type_caster/matrix_type_caster.hpp" // NOLINT: a custom Pybind11 type caster for a plssvm::matrix +#include "bindings/Python/type_caster/matrix_wrapper_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::bindings::python::util::matrix_wrapper +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{check_kwargs_for_correctness, vector_to_pyarray} + +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join +#include "pybind11/buffer_info.h" // py::buffer_info +#include "pybind11/cast.h" // py::cast, py::arg +#include "pybind11/numpy.h" // support for STL types +#include "pybind11/operators.h" // NOLINT: support for operators +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::return_value_policy, py::self, py::dynamic_attr, py::value_error, py::attribute_error, py::tuple, py::pickle +#include "pybind11/pytypes.h" // py::dict, py::kwargs, py::str +#include "pybind11/stl.h" // NOLINT: support for STL types #include // std::fill #include // std::size_t #include // std::int32_t -#include // fixed-width integers #include // std::exception #include // std::map #include // std::unique_ptr, std::make_unique #include // std::optional, std::nullopt +#include // std::runtime_error #include // std::string #include // std::make_tuple, std::ignore #include // std::move @@ -145,9 +148,10 @@ struct svc { /** * @brief Return the currently used params. * @details Necessary for the same Python function and also the string representation. + * @params[in] deep_copy *unused* * @return a Python dictionary containing the used parameter (`[[nodiscard]]`) */ - [[nodiscard]] py::dict get_params(const bool) const { + [[nodiscard]] py::dict get_params([[maybe_unused]] const bool deep_copy) const { PLSSVM_ASSERT(svm_ != nullptr, "svm_ may not be a nullptr! Maybe you forgot to initialize it?"); const plssvm::parameter params = svm_->get_params(); @@ -189,7 +193,7 @@ struct svc { std::map> indices_per_class{}; // init index-map map - for (const label_type &label : model.classes()) { + for (const label_type &label : model.classes()) { // NOLINT(performance-implicit-conversion-in-loop): the types ARE identical indices_per_class.insert({ label, std::vector{} }); } // sort the indices into the respective bucket based on their associated class @@ -207,24 +211,24 @@ struct svc { *model_); } - /// Pointer to the the stored PLSSVM C-SVC instance. - std::unique_ptr svm_{}; + /// Pointer to the stored PLSSVM C-SVC instance. + std::unique_ptr svm_; /// The CG termination criterion if provided. plssvm::real_type epsilon_{}; /// The maximum number of CG iterations if provided. - std::optional max_iter_{}; + std::optional max_iter_; /// The used classification type (or decision function shape). plssvm::classification_type classification_{}; /// The data type of the labels. - py::dtype py_dtype_{}; + py::dtype py_dtype_; /// Pointer to the classification data set wrapper (represents data sets with all possible label types). - std::unique_ptr data_{}; + std::unique_ptr data_; /// Pointer to the classification model wrapper (represents models with all possible label types). - std::unique_ptr model_{}; + std::unique_ptr model_; /// The name of the features. Can only be provided via a Pandas DataFrame. - std::optional> feature_names_{}; + std::optional> feature_names_; }; void init_sklearn_svc(py::module_ &m) { @@ -263,7 +267,7 @@ void init_sklearn_svc(py::module_ &m) { py::arg("coef0") = 0.0, // py::arg("shrinking") = true, // py::arg("probability") = false, - py::arg("tol") = 1e-10, + py::arg("tol") = plssvm::DEFAULT_EPSILON, // py::arg("cache_size") = 200, // py::arg("class_weight") = py::none{}, py::arg("verbose") = false, @@ -286,7 +290,7 @@ void init_sklearn_svc(py::module_ &m) { const auto size = static_cast(std::visit([](auto &&model) { return model.num_classes(); }, *self.model_)); py::array_t py_array(size); const py::buffer_info buffer = py_array.request(); - auto ptr = static_cast(buffer.ptr); + auto *ptr = static_cast(buffer.ptr); std::fill(ptr, ptr + size, plssvm::real_type{ 1.0 }); return py_array; }, "Multipliers of parameter C for each class. ndarray of shape (n_classes,)") .def_property_readonly("classes_", [](const svc &self) -> py::array { @@ -382,7 +386,7 @@ void init_sklearn_svc(py::module_ &m) { } // convert 2D vector back to plssvm::matrix - return py::cast(plssvm::aos_matrix{ std::move(sorted_sv) }); }, "Support vectors. ndarray of shape (n_SV, n_features)") + return py::cast(plssvm::aos_matrix{ sorted_sv }); }, "Support vectors. ndarray of shape (n_SV, n_features)") .def_property_readonly("n_support_", [](const svc &self) -> py::array { if (self.model_ == nullptr) { throw py::attribute_error{ "'SVC' object has no attribute 'n_support_'" }; @@ -393,7 +397,7 @@ void init_sklearn_svc(py::module_ &m) { std::map occurrences{}; // init count map - for (const label_type &label : model.classes()) { + for (const label_type &label : model.classes()) { // NOLINT(performance-implicit-conversion-in-loop): the types ARE identical occurrences.insert({ label, std::int32_t{ 0 } }); } // count occurrences @@ -449,9 +453,8 @@ void init_sklearn_svc(py::module_ &m) { reduced_votes[i] = -votes(i, 0); } return plssvm::bindings::python::util::vector_to_pyarray(reduced_votes); - } else { - return py::cast(votes); } + return py::cast(votes); } case plssvm::classification_type::oao: { @@ -481,12 +484,12 @@ void init_sklearn_svc(py::module_ &m) { if (num_classes == 2) { // no special assembly needed in binary case return model.support_vectors(); - } else { - // note: if this is changed, it must also be changed in the libsvm_model_parsing.hpp in the calculate_alpha_idx function!!! - // order the indices in increasing order - plssvm::soa_matrix temp{ plssvm::shape{ num_data_points_in_sub_matrix, num_features }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } }; - std::vector sorted_indices(num_data_points_in_sub_matrix); - std::merge(index_sets[i].cbegin(), index_sets[i].cend(), index_sets[j].cbegin(), index_sets[j].cend(), sorted_indices.begin()); + } + // note: if this is changed, it must also be changed in the libsvm_model_parsing.hpp in the calculate_alpha_idx function!!! + // order the indices in increasing order + plssvm::soa_matrix temp{ plssvm::shape{ num_data_points_in_sub_matrix, num_features }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } }; + std::vector sorted_indices(num_data_points_in_sub_matrix); + std::merge(index_sets[i].cbegin(), index_sets[i].cend(), index_sets[j].cbegin(), index_sets[j].cend(), sorted_indices.begin()); // copy the support vectors to the binary support vectors // NOTE: it seems that MSVC doesn't like the collapse clause inside a lambda function #if defined(_MSC_VER) @@ -494,13 +497,12 @@ void init_sklearn_svc(py::module_ &m) { #else #pragma omp parallel for collapse(2) #endif - for (std::size_t si = 0; si < num_data_points_in_sub_matrix; ++si) { - for (std::size_t dim = 0; dim < num_features; ++dim) { - temp(si, dim) = model.support_vectors()(sorted_indices[si], dim); - } + for (std::size_t si = 0; si < num_data_points_in_sub_matrix; ++si) { + for (std::size_t dim = 0; dim < num_features; ++dim) { + temp(si, dim) = model.support_vectors()(sorted_indices[si], dim); } - return temp; } + return temp; }(); // we don't use the w optimization for the linear kernel here due to code simplicity @@ -525,9 +527,8 @@ void init_sklearn_svc(py::module_ &m) { votes_access(pp, pos) *= plssvm::real_type{ -1.0 }; } return votes.reshape(py::array::ShapeContainer{ votes.size() }); - } else { - return votes; } + return votes; } } // unreachable @@ -554,7 +555,7 @@ void init_sklearn_svc(py::module_ &m) { using possible_model_types = typename svc::possible_model_types; // create the data set to fit - plssvm::classification_data_set train_data{ std::move(data.matrix), std::move(labels_vector) }; + plssvm::classification_data_set train_data{ std::move(data.matrix), std::forward(labels_vector) }; // fit the model if (self.max_iter_.has_value()) { @@ -590,8 +591,8 @@ void init_sklearn_svc(py::module_ &m) { // predict the data return plssvm::bindings::python::util::vector_to_pyarray(self.svm_->predict(model, data_to_predict)); }, *self.model_); }, "Perform classification on samples in X.", py::arg("X")) - .def("predict_log_proba", [](const svc &, py::array_t) { throw py::attribute_error{ "'SVC' object has no function 'predict_log_proba' (not implemented)" }; }, "Compute log probabilities of possible outcomes for samples in X.", py::arg("X")) - .def("predict_proba", [](const svc &, py::array_t) { throw py::attribute_error{ "'SVC' object has no function 'predict_proba' (not implemented)" }; }, "Compute probabilities of possible outcomes for samples in X.", py::arg("X")) + .def("predict_log_proba", [](const svc &, const py::array_t &) { throw py::attribute_error{ "'SVC' object has no function 'predict_log_proba' (not implemented)" }; }, "Compute log probabilities of possible outcomes for samples in X.", py::arg("X")) + .def("predict_proba", [](const svc &, const py::array_t &) { throw py::attribute_error{ "'SVC' object has no function 'predict_proba' (not implemented)" }; }, "Compute probabilities of possible outcomes for samples in X.", py::arg("X")) .def("score", [](svc &self, plssvm::soa_matrix data, plssvm::bindings::python::util::label_vector_wrapper labels, const std::optional> &sample_weight) -> plssvm::real_type { PLSSVM_ASSERT(self.svm_ != nullptr, "svm_ may not be a nullptr! Maybe you forgot to initialize it?"); // sanity check parameter @@ -607,7 +608,7 @@ void init_sklearn_svc(py::module_ &m) { // get the label types using label_type = typename plssvm::detail::remove_cvref_t::value_type; // create the data set to score - const plssvm::classification_data_set data_to_score{ std::move(data), std::move(labels_vector) }; + const plssvm::classification_data_set data_to_score{ std::move(data), std::forward(labels_vector) }; // score the data try { return self.svm_->score(std::get>(*self.model_), data_to_score); @@ -698,10 +699,21 @@ void init_sklearn_svc(py::module_ &m) { new_svc.max_iter_ = self.max_iter_; new_svc.classification_ = self.classification_; return new_svc; }, "Clone the estimator.") + .def("__sklearn_tags__", [](const svc &self) -> Tags { + Tags sklearn_tags{}; + + // set non-default values + sklearn_tags.estimator_type = "classifier"; + sklearn_tags.target_tags.one_d_labels = true; + sklearn_tags.classifier_tags = ClassifierTags{}; + sklearn_tags.input_tags.sparse = true; + sklearn_tags.input_tags.positive_only = self.svm_->get_params().kernel_type == plssvm::kernel_function_type::chi_squared; + + return sklearn_tags; }, "Set sklearn tags internally used for estimators.") .def("__repr__", [](const svc &self) { // get the currently used parameters - py::dict used_params = self.get_params(true); - py::dict default_params = svc{}.get_params(true); + const py::dict used_params = self.get_params(true); + const py::dict default_params = svc{}.get_params(true); std::vector non_default_values{}; @@ -723,5 +735,21 @@ void init_sklearn_svc(py::module_ &m) { } } - return fmt::format("plssvm.svm.SVC({})", fmt::join(non_default_values, ", ")); }, "Print the SVC showing all non-default parameters."); + return fmt::format("plssvm.svm.SVC({})", fmt::join(non_default_values, ", ")); }, "Print the SVC showing all non-default parameters.") + .def(py::pickle( + // clang-format off + [](const svc &self) { // __getstate__ + // return a tuple that fully encodes the state of the object + return py::make_tuple(self.svm_->get_params(), self.epsilon_, self.max_iter_, self.classification_); + }, + [](py::tuple t) { // NOLINT: __setstate__ + if (t.size() != 4) { + throw std::runtime_error{ "Invalid state!" }; + } + // create a new C++ instance + return svc{ t[0].cast(), t[1].cast(), t[2].cast>(), t[3].cast() }; + } + ) + // clang-format on + ); } diff --git a/bindings/Python/sklearn_like/svr.cpp b/bindings/Python/sklearn_like/svr.cpp index 0ebf68217..89017226a 100644 --- a/bindings/Python/sklearn_like/svr.cpp +++ b/bindings/Python/sklearn_like/svr.cpp @@ -6,9 +6,10 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "plssvm/constants.hpp" // plssvm::real_type +#include "plssvm/constants.hpp" // plssvm::real_type, plssvm::DEFAULT_EPSILON #include "plssvm/csvm_factory.hpp" // plssvm::make_csvr #include "plssvm/data_set/regression_data_set.hpp" // plssvm::regression_data_set +#include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT #include "plssvm/detail/type_traits.hpp" // plssvm::detail::remove_cvref_t #include "plssvm/gamma.hpp" // plssvm::gamma_coefficient_type, plssvm::gamma_type #include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type @@ -18,31 +19,35 @@ #include "plssvm/svm/csvr.hpp" // plssvm::csvr #include "plssvm/verbosity_levels.hpp" // plssvm::verbosity_level, plssvm::verbosity -#include "bindings/Python/data_set/variant_wrapper.hpp" // plssvm::bindings::python::util::regression_data_set_wrapper -#include "bindings/Python/model/variant_wrapper.hpp" // plssvm::bindings::python::util::regression_model_wrapper -#include "bindings/Python/type_caster/label_vector_wrapper_caster.hpp" // a custom Pybind11 type caster for a plssvm::bindings::python::util::label_vector_wrapper -#include "bindings/Python/type_caster/matrix_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::matrix -#include "bindings/Python/type_caster/matrix_wrapper_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::bindings::python::util::matrix_wrapper -#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{check_kwargs_for_correctness, vector_to_pyarray} +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings +#include "bindings/Python/data_set/variant_wrapper.hpp" // plssvm::bindings::python::util::regression_data_set_wrapper +#include "bindings/Python/model/variant_wrapper.hpp" // plssvm::bindings::python::util::regression_model_wrapper +#include "bindings/Python/sklearn_like/tags.hpp" // Tags, TargetTags, TransformerTags, ClassifierTags, RegressorTags, InputTags +#include "bindings/Python/type_caster/label_vector_wrapper_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::bindings::python::util::label_vector_wrapper +#include "bindings/Python/type_caster/matrix_type_caster.hpp" // NOLINT: a custom Pybind11 type caster for a plssvm::matrix +#include "bindings/Python/type_caster/matrix_wrapper_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::bindings::python::util::matrix_wrapper +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{check_kwargs_for_correctness, vector_to_pyarray} #include "fmt/format.h" // fmt::format #include "fmt/ranges.h" // fmt::join -#include "pybind11/cast.h" // py::cast +#include "pybind11/cast.h" // py::cast, py::arg #include "pybind11/numpy.h" // support for STL types -#include "pybind11/operators.h" // support for operators -#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::arg, py::return_value_policy, py::self, py::dynamic_attr, py::value_error, py::attribute_error +#include "pybind11/operators.h" // NOLINT: support for operators +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::return_value_policy, py::self, py::dynamic_attr, py::value_error, py::attribute_error, py::tuple, py::pickle #include "pybind11/pytypes.h" // py::dict, py::kwargs, py::str -#include "pybind11/stl.h" // support for STL types - -#include // std::int32_t -#include // std::unique_ptr, std::make_unique -#include // std::iota -#include // std::optional, std::nullopt -#include // std::string -#include // std::make_tuple -#include // std::move -#include // std::holds_alternative -#include // std::vector +#include "pybind11/stl.h" // NOLINT: support for STL types + +#include // std::int32_t +#include // std::exception +#include // std::unique_ptr, std::make_unique +#include // std::iota +#include // std::optional, std::nullopt +#include // std::runtime_error +#include // std::string +#include // std::make_tuple +#include // std::move, std::forward +#include // std::holds_alternative +#include // std::vector namespace py = pybind11; @@ -97,9 +102,10 @@ struct svr { /** * @brief Return the currently used params. * @details Necessary for the same Python function and also the string representation. + * @params[in] deep_copy *unused* * @return a Python dictionary containing the used parameter (`[[nodiscard]]`) */ - [[nodiscard]] py::dict get_params(const bool) const { + [[nodiscard]] py::dict get_params([[maybe_unused]] const bool deep_copy) const { const plssvm::parameter params = svm_->get_params(); // fill a Python dictionary with the supported keys and values @@ -125,21 +131,21 @@ struct svr { } /// Pointer to the the stored PLSSVM C-SVR instance. - std::unique_ptr svm_{}; + std::unique_ptr svm_; /// The CG termination criterion if provided. plssvm::real_type epsilon_{}; /// The maximum number of CG iterations if provided. - std::optional max_iter_{}; + std::optional max_iter_; /// The data type of the labels. - py::dtype py_dtype_{}; + py::dtype py_dtype_; /// Pointer to the regression data set wrapper (represents data sets with all possible label types). - std::unique_ptr data_{}; + std::unique_ptr data_; /// Pointer to the regression model wrapper (represents models with all possible label types). - std::unique_ptr model_{}; + std::unique_ptr model_; /// The name of the features. Can only be provided via a Pandas DataFrame. - std::optional> feature_names_{}; + std::optional> feature_names_; }; void init_sklearn_svr(py::module_ &m) { @@ -175,7 +181,7 @@ void init_sklearn_svr(py::module_ &m) { py::arg("degree") = 3, py::arg("gamma") = plssvm::gamma_coefficient_type::scale, py::arg("coef0") = 0.0, - py::arg("tol") = 1e-10, + py::arg("tol") = plssvm::DEFAULT_EPSILON, py::arg("C") = 1.0, // py::arg("epsilon") = 0.1, // py::arg("shrinking") = true, // true @@ -283,7 +289,7 @@ void init_sklearn_svr(py::module_ &m) { using possible_model_types = typename svr::possible_model_types; // create the data set to fit - plssvm::regression_data_set train_data{ std::move(data.matrix), std::move(labels_vector) }; + plssvm::regression_data_set train_data{ std::move(data.matrix), std::forward(labels_vector) }; // fit the model using potentially provided keyword arguments if (self.max_iter_.has_value()) { @@ -330,12 +336,12 @@ void init_sklearn_svr(py::module_ &m) { // get the label types using label_type = typename plssvm::detail::remove_cvref_t::value_type; // create the data set to score - const plssvm::regression_data_set data_to_score{ std::move(data), std::move(labels_vector) }; + const plssvm::regression_data_set data_to_score{ std::move(data), std::forward(labels_vector) }; // score the data try { return self.svm_->score(std::get>(*self.model_), data_to_score); } catch (const std::exception &) { - throw py::value_error{ fmt::format("The dtype of the labels to score is \"{}\", but the model was fitted with \"{}\". Please use the same types for fit and score!", labels.dtype.attr("name").cast(), self.py_dtype_.attr("name").cast()) }; + throw py::value_error{ fmt::format(R"(The dtype of the labels to score is "{}", but the model was fitted with "{}". Please use the same types for fit and score!)", labels.dtype.attr("name").cast(), self.py_dtype_.attr("name").cast()) }; } }, labels.labels); }, "Return the mean accuracy on the given test data and labels.", py::arg("X"), py::arg("y"), py::pos_only(), py::arg("sample_weight") = std::nullopt) .def("set_fit_request", [](const svr &) { throw py::attribute_error{ "'SVR' object has no function 'set_fit_request' (not implemented)" }; }, "Request metadata passed to the fit method.") @@ -407,10 +413,21 @@ void init_sklearn_svr(py::module_ &m) { new_svr.epsilon_ = self.epsilon_; new_svr.max_iter_ = self.max_iter_; return new_svr; }, "Clone the estimator.") + .def("__sklearn_tags__", [](const svr &self) -> Tags { + Tags sklearn_tags{}; + + // set non-default values + sklearn_tags.estimator_type = "regressor"; + sklearn_tags.target_tags.one_d_labels = true; + sklearn_tags.regressor_tags = RegressorTags{}; + sklearn_tags.input_tags.sparse = true; + sklearn_tags.input_tags.positive_only = self.svm_->get_params().kernel_type == plssvm::kernel_function_type::chi_squared; + + return sklearn_tags; }, "Set sklearn tags internally used for estimators.") .def("__repr__", [](const svr &self) { // get the currently used parameters - py::dict used_params = self.get_params(true); - py::dict default_params = svr{}.get_params(true); + const py::dict used_params = self.get_params(true); + const py::dict default_params = svr{}.get_params(true); std::vector non_default_values{}; @@ -432,5 +449,22 @@ void init_sklearn_svr(py::module_ &m) { } } - return fmt::format("plssvm.svm.SVR({})", fmt::join(non_default_values, ", ")); }, "Print the SVR showing all non-default parameters."); + return fmt::format("plssvm.svm.SVR({})", fmt::join(non_default_values, ", ")); }, "Print the SVR showing all non-default parameters.") + .def(py::pickle( + // clang-format off + [](const svr &self) { // __getstate__ + // return a tuple that fully encodes the state of the object + return py::make_tuple(self.svm_->get_params(), self.epsilon_, self.max_iter_); + }, + [](py::tuple t) { // NOLINT: __setstate__ + if (t.size() != 3) { + throw std::runtime_error{ "Invalid state!" }; + } + // create a new C++ instance + return svr{ t[0].cast(), t[1].cast(), t[2].cast>() }; + } + ) + // clang-format on + ); + ; } diff --git a/bindings/Python/sklearn_like/tags.cpp b/bindings/Python/sklearn_like/tags.cpp new file mode 100644 index 000000000..16fa3e0d3 --- /dev/null +++ b/bindings/Python/sklearn_like/tags.cpp @@ -0,0 +1,293 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "bindings/Python/sklearn_like/tags.hpp" // Tags, TargetTags, TransformerTags, ClassifierTags, RegressorTags, InputTags + +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings + +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join +#include "pybind11/cast.h" // py::cast, py::repr +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init +#include "pybind11/stl.h" // NOLINT: bind STL types + +#include // std::string + +namespace { + +[[nodiscard]] std::string bool_as_python_string(const bool b) { + return b ? "True" : "False"; +} + +} // namespace + +void init_sklearn_tags(py::module_ &m) { + // TargetTags + py::class_(m, "TargetTags") + .def(py::init<>()) + .def_readwrite("required", &TargetTags::required) + .def_readwrite("one_d_labels", &TargetTags::one_d_labels) + .def_readwrite("two_d_labels", &TargetTags::two_d_labels) + .def_readwrite("positive_only", &TargetTags::positive_only) + .def_readwrite("multi_output", &TargetTags::multi_output) + .def_readwrite("single_output", &TargetTags::single_output) + .def("__repr__", [](const TargetTags &t) { + return fmt::format( + "TargetTags(required={}, one_d_labels={}, two_d_labels={}, " + "positive_only={}, multi_output={}, single_output={})", + bool_as_python_string(t.required), + bool_as_python_string(t.one_d_labels), + bool_as_python_string(t.two_d_labels), + bool_as_python_string(t.positive_only), + bool_as_python_string(t.multi_output), + bool_as_python_string(t.single_output)); + }) + .def(py::pickle( + // clang-format off + [](const TargetTags &self) { // __getstate__ + // return a tuple that fully encodes the state of the object + return py::make_tuple(self.required, self.one_d_labels, self.two_d_labels, self.positive_only, self.multi_output, self.single_output); + }, + [](py::tuple t) { // NOLINT: __setstate__ + if (t.size() != 6) { + throw std::runtime_error{ "Invalid TargetTags pickle state" }; + } + // create a new C++ instance + TargetTags tags; + tags.required = t[0].cast(); + tags.one_d_labels = t[1].cast(); + tags.two_d_labels = t[2].cast(); + tags.positive_only = t[3].cast(); + tags.multi_output = t[4].cast(); + tags.single_output = t[5].cast(); + return tags; + } + ) + // clang-format on + ); + + // TransformerTags + py::class_(m, "TransformerTags") + .def(py::init<>()) + .def_readwrite("preserves_dtype", &TransformerTags::preserves_dtype) + .def("__repr__", [](const TransformerTags &t) { + return fmt::format( + "TransformerTags(preserves_dtype=[{}])", + fmt::join(t.preserves_dtype, ", ")); + }) + .def(py::pickle( + // clang-format off + [](const TransformerTags &self) { // __getstate__ + // return a tuple that fully encodes the state of the object + return py::make_tuple(self.preserves_dtype); + }, + [](py::tuple t) { // NOLINT: __setstate__ + if (t.size() != 1) { + throw std::runtime_error{ "Invalid TransformerTags pickle state" }; + } + // create a new C++ instance + TransformerTags tags; + tags.preserves_dtype = t[0].cast>(); + return tags; + } + ) + // clang-format on + ); + + // ClassifierTags + py::class_(m, "ClassifierTags") + .def(py::init<>()) + .def_readwrite("poor_score", &ClassifierTags::poor_score) + .def_readwrite("multi_class", &ClassifierTags::multi_class) + .def_readwrite("multi_label", &ClassifierTags::multi_label) + .def("__repr__", [](const ClassifierTags &t) { + return fmt::format( + "ClassifierTags(poor_score={}, multi_class={}, multi_label={})", + bool_as_python_string(t.poor_score), + bool_as_python_string(t.multi_class), + bool_as_python_string(t.multi_label)); + }) + .def(py::pickle( + // clang-format off + [](const ClassifierTags &self) { // __getstate__ + // return a tuple that fully encodes the state of the object + return py::make_tuple(self.poor_score, self.multi_class, self.multi_label); + }, + [](py::tuple t) { // NOLINT: __setstate__ + if (t.size() != 3) { + throw std::runtime_error{ "Invalid ClassifierTags pickle state" }; + } + // create a new C++ instance + ClassifierTags tags; + tags.poor_score = t[0].cast(); + tags.multi_class = t[1].cast(); + tags.multi_label = t[2].cast(); + return tags; + } + ) + // clang-format on + ); + + // RegressorTags + py::class_(m, "RegressorTags") + .def(py::init<>()) + .def_readwrite("poor_score", &RegressorTags::poor_score) + .def("__repr__", [](const RegressorTags &t) { + return fmt::format( + "RegressorTags(poor_score={})", + bool_as_python_string(t.poor_score)); + }) + .def(py::pickle( + // clang-format off + [](const RegressorTags &self) { // __getstate__ + // return a tuple that fully encodes the state of the object + return py::make_tuple(self.poor_score); + }, + [](py::tuple t) { // NOLINT: __setstate__ + if (t.size() != 1) { + throw std::runtime_error{ "Invalid RegressorTags pickle state" }; + } + // create a new C++ instance + RegressorTags tags; + tags.poor_score = t[0].cast(); + return tags; + } + ) + // clang-format on + ); + + // InputTags + py::class_(m, "InputTags") + .def(py::init<>()) + .def_readwrite("one_d_array", &InputTags::one_d_array) + .def_readwrite("two_d_array", &InputTags::two_d_array) + .def_readwrite("three_d_array", &InputTags::three_d_array) + .def_readwrite("sparse", &InputTags::sparse) + .def_readwrite("categorical", &InputTags::categorical) + .def_readwrite("string", &InputTags::string) + .def_readwrite("dict", &InputTags::dict) + .def_readwrite("positive_only", &InputTags::positive_only) + .def_readwrite("allow_nan", &InputTags::allow_nan) + .def_readwrite("pairwise", &InputTags::pairwise) + .def("__repr__", [](const InputTags &t) { + return fmt::format( + "InputTags(one_d_array={}, two_d_array={}, three_d_array={}, " + "sparse={}, categorical={}, string={}, dict={}, " + "positive_only={}, allow_nan={}, pairwise={})", + bool_as_python_string(t.one_d_array), + bool_as_python_string(t.two_d_array), + bool_as_python_string(t.three_d_array), + bool_as_python_string(t.sparse), + bool_as_python_string(t.categorical), + bool_as_python_string(t.string), + bool_as_python_string(t.dict), + bool_as_python_string(t.positive_only), + bool_as_python_string(t.allow_nan), + bool_as_python_string(t.pairwise)); + }) + .def(py::pickle( + // clang-format off + [](const InputTags &self) { // __getstate__ + // return a tuple that fully encodes the state of the object + return py::make_tuple(self.one_d_array, self.two_d_array, self.three_d_array, self.sparse, self.categorical, + self.string, self.dict, self.positive_only, self.allow_nan, self.pairwise); + }, + [](py::tuple t) { // NOLINT: __setstate__ + if (t.size() != 10) { + throw std::runtime_error{ "Invalid InputTags pickle state" }; + } + // create a new C++ instance + InputTags tags; + tags.one_d_array = t[0].cast(); + tags.two_d_array = t[1].cast(); + tags.three_d_array = t[2].cast();; + tags.sparse = t[3].cast(); + tags.categorical = t[4].cast(); + tags.string = t[5].cast(); + tags.dict = t[6].cast(); + tags.positive_only = t[7].cast(); + tags.allow_nan = t[8].cast(); + tags.pairwise = t[9].cast(); + return tags; + } + ) + // clang-format on + ); + + // Tags (root object) + py::class_(m, "Tags") + .def(py::init<>()) + .def_readwrite("estimator_type", &Tags::estimator_type) + .def_readwrite("target_tags", &Tags::target_tags) + .def_readwrite("transformer_tags", &Tags::transformer_tags) + .def_readwrite("classifier_tags", &Tags::classifier_tags) + .def_readwrite("regressor_tags", &Tags::regressor_tags) + .def_readwrite("array_api_support", &Tags::array_api_support) + .def_readwrite("no_validation", &Tags::no_validation) + .def_readwrite("non_deterministic", &Tags::non_deterministic) + .def_readwrite("requires_fit", &Tags::requires_fit) + .def_readwrite("_skip_test", &Tags::_skip_test) + .def_readwrite("input_tags", &Tags::input_tags) + .def("__repr__", [](const Tags &t) { + const std::string estimator_type = t.estimator_type.has_value() + ? fmt::format("'{}'", t.estimator_type.value()) + : "None"; + const auto &tag_as_string_or_none = [](const auto &opt_tag) -> std::string { + if (!opt_tag.has_value()) { + return "None"; + } + return py::repr(py::cast(opt_tag.value())).template cast(); + }; + + return fmt::format( + "Tags(estimator_type={}, target_tags={}, transformer_tags={}, " + "classifier_tags={}, regressor_tags={}, array_api_support={}, " + "no_validation={}, non_deterministic={}, requires_fit={}, " + "_skip_test={}, input_tags={})", + estimator_type, + py::repr(py::cast(t.target_tags)).cast(), + tag_as_string_or_none(t.transformer_tags), + tag_as_string_or_none(t.classifier_tags), + tag_as_string_or_none(t.regressor_tags), + bool_as_python_string(t.array_api_support), + bool_as_python_string(t.no_validation), + bool_as_python_string(t.non_deterministic), + bool_as_python_string(t.requires_fit), + bool_as_python_string(t._skip_test), + py::repr(py::cast(t.input_tags)).cast()); + }) + .def(py::pickle( + // clang-format off + [](const Tags &self) { // __getstate__ + // return a tuple that fully encodes the state of the object + return py::make_tuple(self.estimator_type, self.target_tags, self.transformer_tags, self.classifier_tags, self.regressor_tags, + self.array_api_support, self.no_validation, self.non_deterministic, self.requires_fit, self._skip_test, self.input_tags); + }, + [](py::tuple t) { // NOLINT: __setstate__ + if (t.size() != 11) { + throw std::runtime_error{ "Invalid Tags pickle state" }; + } + // create a new C++ instance + Tags tags; + tags.estimator_type = t[0].cast>(); + tags.target_tags = t[1].cast(); + tags.transformer_tags = t[2].cast>(); + tags.classifier_tags = t[3].cast>(); + tags.regressor_tags = t[4].cast>(); + tags.array_api_support = t[5].cast(); + tags.no_validation = t[6].cast(); + tags.non_deterministic = t[7].cast(); + tags.requires_fit = t[8].cast(); + tags._skip_test = t[9].cast(); + tags.input_tags = t[10].cast(); + return tags; + } + ) + // clang-format on + ); +} diff --git a/bindings/Python/sklearn_like/tags.hpp b/bindings/Python/sklearn_like/tags.hpp new file mode 100644 index 000000000..94efb3099 --- /dev/null +++ b/bindings/Python/sklearn_like/tags.hpp @@ -0,0 +1,125 @@ +/** + * @file + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Implements tags classes used for sklearn's `__sklearn_tags__` attribute. + */ + +#ifndef PLSSVM_BINDINGS_PYTHON_SKLEARN_LIKE_TAGS_HPP_ +#define PLSSVM_BINDINGS_PYTHON_SKLEARN_LIKE_TAGS_HPP_ +#pragma once + +#include // std::optional +#include // std::string +#include // std::vector + +/** + * @brief Tags for the target data. + */ +struct TargetTags { + /// Whether the estimator requires y to be passed to `fit`, `fit_predict`, or `fit_transform` methods. + bool required = true; + /// Whether the input is a 1D labels (y). + bool one_d_labels = false; + /// Whether the input is a 2D labels (y). + bool two_d_labels = false; + /// Whether the estimator requires a positive y (only applicable for regression). + bool positive_only = false; + /// Whether a regressor supports multi-target outputs or a classifier supports multi-class multi-output. + bool multi_output = false; + /// Whether the target can be single-output. This can be `false` if the estimator supports only multi-output cases. + bool single_output = true; +}; + +/** + * @brief Tags for the transformer. + */ +struct TransformerTags { + /// Applies only on transformers. + /// It corresponds to the data types which will be preserved such that `X_trans.dtype` is the same as `X.dtype` after calling `transformer.transform(X)`. + std::vector preserves_dtype{ "float64" }; +}; + +/** + * @brief Tags for the classifier. + */ +struct ClassifierTags { + /// Whether the estimator fails to provide a “reasonable” test-set score, which currently for classification is an + /// accuracy of 0.83 on `make_blobs(n_samples=300, random_state=0)`. + bool poor_score = false; + /// Whether the classifier can handle multi-class classification. + bool multi_class = true; + /// Whether the classifier supports multi-label output: a data point can be predicted to belong to a variable number of classes. + bool multi_label = false; +}; + +/** + * @brief Tags for the regressor. + */ +struct RegressorTags { + /// Whether the estimator fails to provide a “reasonable” test-set score, which currently for regression is an R2 of 0.5 on + /// `make_regression(n_samples=200, n_features=10, n_informative=1, bias=5.0, noise=20, random_state=42)`. + bool poor_score = false; +}; + +/** + * @brief Tags for the input data. + */ +struct InputTags { + /// Whether the input can be a 1D array. + bool one_d_array = false; + /// Whether the input can be a 2D array. + bool two_d_array = true; + /// Whether the input can be a 3D array. + bool three_d_array = false; + /// Whether the input can be a sparse matrix. + bool sparse = false; + /// Whether the input can be categorical. + bool categorical = false; + /// Whether the input can be an array-like of strings. + bool string = false; + /// Whether the input can be a dictionary. + bool dict = false; + /// Whether the estimator requires positive X. + bool positive_only = false; + /// Whether the estimator supports data with missing values encoded as `np.nan`. + bool allow_nan = false; + /// This boolean attribute indicates whether the data(`X`), fit and similar methods consists of pairwise measures + /// over samples rather than a feature representation for each sample. It is usually `true` where an estimator has + /// a metric or affinity or kernel parameter with value "precomputed". + bool pairwise = false; +}; + +/** + * @brief Tags for the estimator. + */ +struct Tags { + /// The type of the estimator. Can be one of: - “classifier” - “regressor” - “transformer” - “clusterer” - “outlier_detector” - “density_estimator” + std::optional estimator_type; + /// The target(y) tags. + TargetTags target_tags; + /// The transformer tags. + std::optional transformer_tags; + /// The classifier tags. + std::optional classifier_tags; + /// The regressor tags. + std::optional regressor_tags; + /// Whether the estimator supports Array API compatible inputs. + bool array_api_support = false; + /// Whether the estimator skips input-validation. This is only meant for stateless and dummy transformers! + bool no_validation = false; + /// Whether the estimator is not deterministic given a fixed `random_state`. + bool non_deterministic = false; + /// Whether the estimator requires to be fitted before calling one of `transform`, `predict`, `predict_proba`, or `decision_function`. + bool requires_fit = true; + /// Whether to skip common tests entirely. Don’t use this unless you have a very good reason. + bool _skip_test = false; + /// The input data(X) tags. + InputTags input_tags; +}; + +#endif // PLSSVM_BINDINGS_PYTHON_SKLEARN_LIKE_TAGS_HPP_ diff --git a/bindings/Python/solver_types.cpp b/bindings/Python/solver_types.cpp index f8309fb4b..e0ed3557b 100644 --- a/bindings/Python/solver_types.cpp +++ b/bindings/Python/solver_types.cpp @@ -8,7 +8,8 @@ #include "plssvm/solver_types.hpp" // plssvm::solver_type -#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::register_implicit_str_enum_conversion +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::register_implicit_str_enum_conversion #include "pybind11/pybind11.h" // py::module_, py::enum_ @@ -16,7 +17,7 @@ namespace py = pybind11; void init_solver_types(py::module_ &m) { // bind enum class - py::enum_ py_enum(m, "SolverType", "Enum class for all possible solver types implemented in PLSSVM."); + py::enum_ py_enum(m, "SolverType", "enum.Enum", "Enum class for all possible solver types implemented in PLSSVM."); py_enum .value("AUTOMATIC", plssvm::solver_type::automatic, "the default solver type; depends on the available device and system memory") .value("CG_EXPLICIT", plssvm::solver_type::cg_explicit, "explicitly assemble the kernel matrix on the device") diff --git a/bindings/Python/svm/csvc.cpp b/bindings/Python/svm/csvc.cpp index 1e9bd6c58..1368725da 100644 --- a/bindings/Python/svm/csvc.cpp +++ b/bindings/Python/svm/csvc.cpp @@ -10,26 +10,28 @@ #include "plssvm/backend_types.hpp" // plssvm::backend_type #include "plssvm/classification_types.hpp" // plssvm::classification_type -#include "plssvm/constants.hpp" // plssvm::real_type +#include "plssvm/constants.hpp" // plssvm::real_type, plssvm::DEFAULT_EPSILON #include "plssvm/data_set/classification_data_set.hpp" // plssvm::classification_data_set #include "plssvm/detail/type_traits.hpp" // plssvm::detail::remove_cvref_t #include "plssvm/gamma.hpp" // plssvm::gamma_type #include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type -#include "plssvm/model/classification_model.hpp" // plssvm::classification_model #include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator #include "plssvm/parameter.hpp" // plssvm::parameter, named arguments #include "plssvm/solver_types.hpp" // plssvm::solver_type #include "plssvm/target_platforms.hpp" // plssvm::target_platform +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings #include "bindings/Python/data_set/variant_wrapper.hpp" // plssvm::bindings::python::util::classification_data_set_wrapper #include "bindings/Python/model/variant_wrapper.hpp" // plssvm::bindings::python::util::classification_model_wrapper #include "bindings/Python/svm/utility.hpp" // plssvm::bindings::python::util::assemble_csvm -#include "bindings/Python/type_caster/mpi_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::mpi::communicator +#include "bindings/Python/type_caster/mpi_type_caster.hpp" // NOLINT: a custom Pybind11 type caster for a plssvm::mpi::communicator #include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{python_type_name_mapping, vector_to_pyarray} #include "fmt/format.h" // fmt::format -#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::arg, py::kw_only, py::kwargs, py::value_error -#include "pybind11/stl.h" // support for STL types: std::optional +#include "pybind11/cast.h" // py::arg +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::kw_only, py::value_error +#include "pybind11/pytypes.h" // py::kwargs +#include "pybind11/stl.h" // NOLINT: support for STL types: std::optional #include // std::exception #include // std::optional, std::nullopt @@ -81,16 +83,15 @@ void init_csvc(py::module_ &m) { plssvm::max_iter = max_iter.value(), plssvm::classification = classification, plssvm::solver = solver) }; - } else { - return classification_model_wrapper{ self.fit(data, - plssvm::epsilon = epsilon, - plssvm::classification = classification, - plssvm::solver = solver) }; } + return classification_model_wrapper{ self.fit(data, + plssvm::epsilon = epsilon, + plssvm::classification = classification, + plssvm::solver = solver) }; }, data_set.data_set); }, "fit a model using the current C-SVC on the provided data", py::arg("data"), py::kw_only(), - py::arg("epsilon") = plssvm::real_type{ 1e-10 }, + py::arg("epsilon") = plssvm::DEFAULT_EPSILON, py::arg("max_iter") = std::nullopt, py::arg("classification") = plssvm::classification_type::oaa, py::arg("solver") = plssvm::solver_type::automatic) diff --git a/bindings/Python/svm/csvm.cpp b/bindings/Python/svm/csvm.cpp index 78a1d4fb9..a4a091cf7 100644 --- a/bindings/Python/svm/csvm.cpp +++ b/bindings/Python/svm/csvm.cpp @@ -13,10 +13,13 @@ #include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type #include "plssvm/parameter.hpp" // plssvm::parameter, named arguments -#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::check_kwargs_for_correctness +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::check_kwargs_for_correctness +#include "pybind11/cast.h" // py::arg #include "pybind11/pybind11.h" // py::module_, py::class_, py::arg, py::kwargs, py::module_local -#include "pybind11/stl.h" // support for STL types: std::variant +#include "pybind11/pytypes.h" // py::kwargs +#include "pybind11/stl.h" // NOLINT: support for STL types: std::variant namespace py = pybind11; diff --git a/bindings/Python/svm/csvr.cpp b/bindings/Python/svm/csvr.cpp index 23dddb1c1..ee9c508b2 100644 --- a/bindings/Python/svm/csvr.cpp +++ b/bindings/Python/svm/csvr.cpp @@ -9,26 +9,27 @@ #include "plssvm/svm/csvr.hpp" // plssvm::csvr #include "plssvm/backend_types.hpp" // plssvm::backend_type -#include "plssvm/constants.hpp" // plssvm::real_type +#include "plssvm/constants.hpp" // plssvm::real_type, plssvm::DEFAULT_EPSILON #include "plssvm/data_set/regression_data_set.hpp" // plssvm::regression_data_set #include "plssvm/detail/type_traits.hpp" // plssvm::detail::remove_cvref_t #include "plssvm/gamma.hpp" // plssvm::gamma_type #include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type -#include "plssvm/model/regression_model.hpp" // plssvm::regression_model #include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator #include "plssvm/parameter.hpp" // plssvm::parameter, named arguments #include "plssvm/solver_types.hpp" // plssvm::solver_type #include "plssvm/target_platforms.hpp" // plssvm::target_platform +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings #include "bindings/Python/data_set/variant_wrapper.hpp" // plssvm::bindings::python::util::regression_data_set_wrapper #include "bindings/Python/model/variant_wrapper.hpp" // plssvm::bindings::python::util::regression_model_wrapper #include "bindings/Python/svm/utility.hpp" // plssvm::bindings::python::util::assemble_csvm -#include "bindings/Python/type_caster/mpi_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::mpi::communicator +#include "bindings/Python/type_caster/mpi_type_caster.hpp" // NOLINT: a custom Pybind11 type caster for a plssvm::mpi::communicator #include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{python_type_name_mapping, vector_to_pyarray} #include "fmt/format.h" // fmt::format -#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::arg, py::kw_only, py::kwargs, py::value_error -#include "pybind11/stl.h" // support for STL types: std::optional +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::arg, py::kw_only, py::value_error +#include "pybind11/pytypes.h" // py::kwargs +#include "pybind11/stl.h" // NOLINT: support for STL types: std::optional #include // std::exception #include // std::optional, std::nullopt @@ -79,15 +80,14 @@ void init_csvr(py::module_ &m) { plssvm::epsilon = epsilon, plssvm::max_iter = max_iter.value(), plssvm::solver = solver) }; - } else { - return regression_model_wrapper{ self.fit(data, - plssvm::epsilon = epsilon, - plssvm::solver = solver) }; } + return regression_model_wrapper{ self.fit(data, + plssvm::epsilon = epsilon, + plssvm::solver = solver) }; }, data_set.data_set); }, "fit a model using the current C-SVR on the provided data", py::arg("data"), py::kw_only(), - py::arg("epsilon") = plssvm::real_type{ 1e-10 }, + py::arg("epsilon") = plssvm::DEFAULT_EPSILON, py::arg("max_iter") = std::nullopt, py::arg("solver") = plssvm::solver_type::automatic) .def("predict", [](const plssvm::csvr &self, const regression_model_wrapper &trained_model, const regression_data_set_wrapper &data_set) { diff --git a/bindings/Python/svm/utility.hpp b/bindings/Python/svm/utility.hpp index 38019bf8b..3273b0673 100644 --- a/bindings/Python/svm/utility.hpp +++ b/bindings/Python/svm/utility.hpp @@ -13,14 +13,14 @@ #define PLSSVM_BINDINGS_PYTHON_SVM_UTILITY_HPP_ #pragma once -#include "plssvm/backend_types.hpp" // plssvm::backend_type -#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space -#include "plssvm/backends/SYCL/implementation_types.hpp" // plssvm::sycl::implementation_type -#include "plssvm/backends/SYCL/kernel_invocation_types.hpp" // plssvm::sycl::kernel_invocation_type -#include "plssvm/csvm_factory.hpp" // plssvm::make_csvm -#include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator -#include "plssvm/parameter.hpp" // plssvm::parameter, named arguments -#include "plssvm/target_platforms.hpp" // plssvm::target_platform +#include "plssvm/backend_types.hpp" // plssvm::backend_type +#include "plssvm/backends/Kokkos/execution_spaces.hpp" // plssvm::kokkos::execution_space +#include "plssvm/backends/SYCL/data_parallel_kernels.hpp" // plssvm::sycl::data_parallel_kernel +#include "plssvm/backends/SYCL/implementation_types.hpp" // plssvm::sycl::implementation_type +#include "plssvm/csvm_factory.hpp" // plssvm::make_csvm +#include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator +#include "plssvm/parameter.hpp" // plssvm::parameter, named arguments +#include "plssvm/target_platforms.hpp" // plssvm::target_platform #include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::check_kwargs_for_correctness @@ -46,7 +46,7 @@ namespace plssvm::bindings::python::util { template [[nodiscard]] inline std::unique_ptr assemble_csvm(const plssvm::backend_type backend, const plssvm::target_platform target, const plssvm::parameter ¶ms, plssvm::mpi::communicator comm, const py::kwargs &optional_args) { // check keyword arguments - plssvm::bindings::python::util::check_kwargs_for_correctness(optional_args, { "foo", "sycl_implementation_type", "sycl_kernel_invocation_type", "kokkos_execution_space" }); + plssvm::bindings::python::util::check_kwargs_for_correctness(optional_args, { "foo", "sycl_implementation_type", "sycl_data_parallel_kernel", "kokkos_execution_space" }); if (backend == plssvm::backend_type::sycl) { // parse SYCL specific keyword arguments @@ -54,13 +54,14 @@ template if (optional_args.contains("sycl_implementation_type")) { impl_type = optional_args["sycl_implementation_type"].cast(); } - plssvm::sycl::kernel_invocation_type invocation_type = plssvm::sycl::kernel_invocation_type::automatic; - if (optional_args.contains("sycl_kernel_invocation_type")) { - invocation_type = optional_args["sycl_kernel_invocation_type"].cast(); + plssvm::sycl::data_parallel_kernel data_parallel_kernel_type = plssvm::sycl::data_parallel_kernel::automatic; + if (optional_args.contains("sycl_data_parallel_kernel")) { + data_parallel_kernel_type = optional_args["sycl_data_parallel_kernel"].cast(); } - return plssvm::make_csvm(backend, std::move(comm), target, params, plssvm::sycl_implementation_type = impl_type, plssvm::sycl_kernel_invocation_type = invocation_type); - } else if (backend == plssvm::backend_type::kokkos) { + return plssvm::make_csvm(backend, std::move(comm), target, params, plssvm::sycl_implementation_type = impl_type, plssvm::sycl_data_parallel_kernel = data_parallel_kernel_type); + } + if (backend == plssvm::backend_type::kokkos) { // parse Kokkos specific keyword arguments plssvm::kokkos::execution_space space = plssvm::kokkos::execution_space::automatic; if (optional_args.contains("kokkos_execution_space")) { @@ -68,9 +69,8 @@ template } return plssvm::make_csvm(backend, std::move(comm), target, params, plssvm::kokkos_execution_space = space); - } else { - return plssvm::make_csvm(backend, std::move(comm), target, params); } + return plssvm::make_csvm(backend, std::move(comm), target, params); } } // namespace plssvm::bindings::python::util diff --git a/bindings/Python/svm_types.cpp b/bindings/Python/svm_types.cpp index 793354c12..401113927 100644 --- a/bindings/Python/svm_types.cpp +++ b/bindings/Python/svm_types.cpp @@ -8,15 +8,17 @@ #include "plssvm/svm_types.hpp" // plssvm::svm_type, plssvm::list_available_svm_types, plssvm::svm_type_from_model_file -#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::register_implicit_str_enum_conversion +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::register_implicit_str_enum_conversion +#include "pybind11/cast.h" // py::arg #include "pybind11/pybind11.h" // py::module_, py::enum_ namespace py = pybind11; void init_svm_types(py::module_ &m) { // bind enum class - py::enum_ py_enum(m, "SVMType", "Enum class for all implemented SVM types in PLSSVM."); + py::enum_ py_enum(m, "SVMType", "enum.Enum", "Enum class for all implemented SVM types in PLSSVM."); py_enum .value("CSVC", plssvm::svm_type::csvc, "use a C-SVC for classification") .value("CSVR", plssvm::svm_type::csvr, "use a C-SVR for classification"); @@ -26,6 +28,6 @@ void init_svm_types(py::module_ &m) { // bind free functions m.def("list_available_svm_types", &plssvm::list_available_svm_types, "list the available SVM types"); - m.def("svm_type_to_task_name", &plssvm::svm_type_to_task_name, "get the task name (e.g., \"classification\" or \"regression\") based on the provided SVMType", py::arg("svm_type")); + m.def("svm_type_to_task_name", &plssvm::svm_type_to_task_name, R"(get the task name (e.g., "classification" or "regression") based on the provided SVMType)", py::arg("svm_type")); m.def("svm_type_from_model_file", &plssvm::svm_type_from_model_file, "determine the SVMType based on the provided LIBSVM model file", py::arg("filename")); } diff --git a/bindings/Python/target_platforms.cpp b/bindings/Python/target_platforms.cpp index e47be725b..530a30385 100644 --- a/bindings/Python/target_platforms.cpp +++ b/bindings/Python/target_platforms.cpp @@ -8,16 +8,17 @@ #include "plssvm/target_platforms.hpp" // plssvm::target_platform, plssvm::list_available_target_platforms, plssvm::determine_default_target_platform -#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::register_implicit_str_enum_conversion +#include "bindings/Python/bindings_fwd.hpp" // forward declare all helper functions to create the Python bindings +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::register_implicit_str_enum_conversion #include "pybind11/pybind11.h" // py::module_, py::enum_ -#include "pybind11/stl.h" // support for STL types: std::vector +#include "pybind11/stl.h" // NOLINT: support for STL types: std::vector namespace py = pybind11; void init_target_platforms(py::module_ &m) { // bind enum class - py::enum_ py_enum(m, "TargetPlatform", "Enum class for all possible targets that PLSSVM supports."); + py::enum_ py_enum(m, "TargetPlatform", "enum.Enum", "Enum class for all possible targets that PLSSVM supports."); py_enum .value("AUTOMATIC", plssvm::target_platform::automatic, "the default target with respect to the used backend type; checks for available devices in the following order: NVIDIA GPUs -> AMD GPUs -> Intel GPUs -> CPUs") .value("CPU", plssvm::target_platform::cpu, "target CPUs only (Intel, AMD, IBM, ...)") diff --git a/bindings/Python/type_caster/label_vector_wrapper_caster.hpp b/bindings/Python/type_caster/label_vector_wrapper_type_caster.hpp similarity index 92% rename from bindings/Python/type_caster/label_vector_wrapper_caster.hpp rename to bindings/Python/type_caster/label_vector_wrapper_type_caster.hpp index becd60845..fb18dc20f 100644 --- a/bindings/Python/type_caster/label_vector_wrapper_caster.hpp +++ b/bindings/Python/type_caster/label_vector_wrapper_type_caster.hpp @@ -72,11 +72,10 @@ template if (arr.size() == 0) { // return an empty vector return std::vector{}; - } else { - // convert py::array to std::vector - auto arr_t = arr.cast>(); - return std::vector(arr_t.data(0), arr_t.data(0) + arr_t.shape(0)); } + // convert py::array to std::vector + auto arr_t = arr.cast>(); + return std::vector(arr_t.data(0), arr_t.data(0) + arr_t.shape(0)); } /** @@ -98,12 +97,12 @@ template template [[nodiscard]] possible_vector_types generic_pyarray_to_vector(const py::array &arr) { // sanity check the passed py::array - if (!(arr.flags() & py::array::c_style)) { + if (!(arr.flags() & py::array::c_style)) { // NOLINT(hicpp-signed-bitwise): Pybind11 way to do this throw py::value_error{ "The py::array must be C-contiguous" }; } // the type used in the py::array - py::dtype type = arr.dtype(); + const py::dtype type = arr.dtype(); PLSSVM_CREATE_PYARRAY_TO_VECTOR_MAPPINGS(bool) PLSSVM_CREATE_PYARRAY_TO_VECTOR_MAPPINGS(std::int8_t) @@ -127,14 +126,13 @@ template if (arr.size() == 0) { // return an empty vector return std::vector{}; - } else { - std::vector result; - result.reserve(arr.shape(0)); - for (py::handle item : arr) { - result.push_back(py::cast(item)); - } - return result; } + std::vector result; + result.reserve(arr.shape(0)); + for (const py::handle &item : arr) { + result.push_back(py::cast(item)); + } + return result; } } @@ -205,9 +203,9 @@ template py::type highest_type{ py::module_::import("builtins").attr("bool") }; int highest_precedence{ -1 }; for (std::size_t i = 0; i < py::len(list); ++i) { - py::object item = list[i]; - py::type type = py::type::of(item); - int precedence = precedence_map.at(type); + const py::object item = list[i]; + const py::type type = py::type::of(item); + const int precedence = precedence_map.at(type); if (precedence > highest_precedence) { highest_precedence = precedence; highest_type = type; @@ -263,7 +261,7 @@ struct label_vector_wrapper { /// The labels. PossibleTypes labels{}; /// The actually used Python dtype. - py::dtype dtype{}; + py::dtype dtype; }; } // namespace plssvm::bindings::python::util @@ -286,9 +284,11 @@ struct type_caster(obj)) { // provided obj is a Python list auto [labels, dtype] = plssvm::bindings::python::util::generic_pylist_to_vector(py::cast(obj)); @@ -317,7 +318,7 @@ struct type_caster(); arr = arr.reshape({ arr.size() }); } else { - throw py::value_error{ fmt::format("Unsupported data type: {}", std::string{ py::str(obj.get_type().attr("__name__")) }) }; + throw py::value_error{ fmt::format("Unsupported data type: {}", std::string{ py::str(py::type::of(obj).attr("__name__")) }) }; } // sanity check the number of elements in the numpy array diff --git a/bindings/Python/type_caster/matrix_type_caster.hpp b/bindings/Python/type_caster/matrix_type_caster.hpp index 0231beb76..852182384 100644 --- a/bindings/Python/type_caster/matrix_type_caster.hpp +++ b/bindings/Python/type_caster/matrix_type_caster.hpp @@ -55,9 +55,11 @@ struct type_caster> { * @details If the PLSSVM matrix's memory layout is AoS, uses a Numpy ndarray with c_style layout, * if the PLSSVM matrix's memory layout is SoA, uses a Numpy ndarray with f_style layout. * @param[in] matr the PLSSVM matrix to convert to a Numpy ndarray + * @params[in] rvp *unused* + * @params[in] h *unused* * @return a Pybind11 handle to the Numpy ndarray */ - static py::handle cast(const matrix_type &matr, py::return_value_policy, py::handle) { + static py::handle cast(const matrix_type &matr, [[maybe_unused]] const py::return_value_policy rvp, [[maybe_unused]] const py::handle h) { const std::size_t num_data_points = matr.num_rows(); const std::size_t num_features = matr.num_cols(); @@ -66,7 +68,7 @@ struct type_caster> { // create the Python numpy array py_array_type arr({ num_data_points, num_features }); - py::buffer_info buffer = arr.request(); + const py::buffer_info buffer = arr.request(); T *ptr = static_cast(buffer.ptr); // check if the provided matrix has padding entries -> must be removed @@ -103,14 +105,14 @@ struct type_caster> { const std::size_t num_cols = arr.shape(1); // get the underlying raw memory - py::buffer_info buffer = arr.request(); + const py::buffer_info buffer = arr.request(); const T *ptr = static_cast(buffer.ptr); // note: the conversions use OpenMP -> remove Python's Global Interpreter Lock const py::gil_scoped_release release; // check the memory layout of the Python Numpy array - if constexpr (static_cast(Flags & py::array::c_style)) { + if constexpr (static_cast(Flags & py::array::c_style)) { // NOLINT(hicpp-signed-bitwise): Pybind11 way to do this // the provided Python Numpy array has C style layout if constexpr (layout == plssvm::layout_type::aos) { // memory layout of Python Numpy array and PLSSVM matrix are the same -> can use memcpy to convert @@ -130,7 +132,7 @@ struct type_caster> { // unsupported PLSSVM matrix memory layout return false; } - } else if constexpr (static_cast(Flags & py::array::f_style)) { + } else if constexpr (static_cast(Flags & py::array::f_style)) { // NOLINT(hicpp-signed-bitwise): Pybind11 way to do this if constexpr (layout == plssvm::layout_type::aos) { // the memory layouts don't match -> must use loops to convert layouts #pragma omp parallel for collapse(2) @@ -161,13 +163,14 @@ struct type_caster> { * @brief Try converting a Python object @p obj to a plssvm::matrix. * @detauls Honors different Numpy ndarray memory layouts (c_style or f_style) and PLSSVM matrix layout types. * @param[in] obj the object to convert + * @params[in] allow_implicit_conversion *unused* * @return `true` if the conversion was successful, `false` otherwise * @throws py::value_error if the provided Python list is empty (or one-dimensional) * @throws py::value_error if the provided 2D Python list has inhomogeneous shape * @throws py::value_error if @p obj is not a Numpy ndarray, Pandas DataFrame, SciPy sparse matrix, or Python 2D list * @throws py::value_error if the Numpy ndarray doesn't have a two-dimensional shape */ - bool load(py::handle obj, bool) { + bool load(py::handle obj, [[maybe_unused]] const bool allow_implicit_conversion) { // special case py::list if (py::isinstance(obj)) { // provided obj is a Python list -> check if it is a correct py::list of py::list @@ -215,7 +218,7 @@ struct type_caster> { // provided obj is a SciPy sparse matrix arr = obj.attr("toarray")().cast(); } else { - throw py::value_error{ fmt::format("Unsupported data type: {}", std::string{ py::str(obj.get_type().attr("__name__")) }) }; + throw py::value_error{ fmt::format("Unsupported data type: {}", std::string{ py::str(py::type::of(obj).attr("__name__")) }) }; } // sanity check the number of elements in the numpy array @@ -237,27 +240,27 @@ struct type_caster> { value = matrix_type{ plssvm::shape{ num_rows, num_cols }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } }; // get the underlying buffer - py::buffer_info buffer = arr.request(); + const py::buffer_info buffer = arr.request(); // check the memory layout of the Python Numpy array if (plssvm::bindings::python::util::is_c_contiguous(buffer)) { // array is already c_style -> no need to force cast return copy_pyarray_to_matrix(arr.cast>()); - } else if (plssvm::bindings::python::util::is_f_contiguous(buffer)) { + } + if (plssvm::bindings::python::util::is_f_contiguous(buffer)) { // array is already f_style -> no need to force cast return copy_pyarray_to_matrix(arr.cast>()); - } else { - // array is non-contiguous - if constexpr (layout == plssvm::layout_type::aos) { - // if we want to get a PLSSVM matrix in AoS layout, force casting to c_style is more performant - return copy_pyarray_to_matrix(arr.cast>()); - } else if constexpr (layout == plssvm::layout_type::soa) { - // if we want to get a PLSSVM matrix in SoA layout, force casting to f_style is more performant - return copy_pyarray_to_matrix(arr.cast>()); - } else { - return false; - } } + // array is non-contiguous + if constexpr (layout == plssvm::layout_type::aos) { + // if we want to get a PLSSVM matrix in AoS layout, force casting to c_style is more performant + return copy_pyarray_to_matrix(arr.cast>()); + } + if constexpr (layout == plssvm::layout_type::soa) { + // if we want to get a PLSSVM matrix in SoA layout, force casting to f_style is more performant + return copy_pyarray_to_matrix(arr.cast>()); + } + return false; } return true; diff --git a/bindings/Python/type_caster/matrix_wrapper_type_caster.hpp b/bindings/Python/type_caster/matrix_wrapper_type_caster.hpp index de1105077..a22e2457d 100644 --- a/bindings/Python/type_caster/matrix_wrapper_type_caster.hpp +++ b/bindings/Python/type_caster/matrix_wrapper_type_caster.hpp @@ -41,7 +41,7 @@ struct matrix_wrapper { /// The PLSSVM matrix. plssvm::matrix matrix{}; /// The optionally available feature names. - std::optional> feature_names{}; + std::optional> feature_names; }; /** @@ -77,9 +77,11 @@ struct type_caster> { /** * @brief Convert a matrix_wrapper to a Numpy ndarray. Simply calls the custom type caster for a plssvm::matrix. * @param[in] matr the PLSSVM matrix to convert to a Numpy ndarray + * @params[in] rvp *unused* + * @params[in] h *unused* * @return a Pybind11 handle to the Numpy ndarray */ - static py::handle cast(const matrix_type &matr, py::return_value_policy, py::handle) { + static py::handle cast(const matrix_type &matr, [[maybe_unused]] const py::return_value_policy rvp, [[maybe_unused]] const py::handle h) { return py::cast(matr.matrix); } @@ -87,11 +89,12 @@ struct type_caster> { * @brief Try converting a Python object @p obj to a matrix_wrapper. * @detauls Calls the custom type caster for a plssvm::matrix and, additionally, tries to gather the feature names. * @param[in] obj the object to convert + * @params[in] allow_implicit_conversions *unused* * @return `true` if the conversion was successful, `false` otherwise * @throws py::value_error all exceptions from the custom plssvm::matrix type caster * @throws py::value_error if not all column names are strings */ - bool load(py::handle obj, bool) { + bool load(py::handle obj, [[maybe_unused]] const bool allow_implicit_conversions) { // convert the object to a plssvm::matrix value.matrix = obj.cast>(); @@ -101,7 +104,7 @@ struct type_caster> { const auto &list = obj.attr("columns").cast(); std::vector column_names{}; column_names.reserve(list.size()); - for (py::handle item : list) { + for (const py::handle &item : list) { // note: column names are only set if they are ALL strings if (!py::isinstance(item)) { throw py::type_error{ diff --git a/bindings/Python/type_caster/mpi_type_caster.hpp b/bindings/Python/type_caster/mpi_type_caster.hpp index 35ff98ec9..b7997a51d 100644 --- a/bindings/Python/type_caster/mpi_type_caster.hpp +++ b/bindings/Python/type_caster/mpi_type_caster.hpp @@ -39,9 +39,11 @@ struct type_caster { /** * @brief Convert a plssvm::mpi::communicator to a mpi4py communicator. * @param[in] comm the PLSSVM MPI communicator wrapper + * @params[in] rvp *unused* + * @params[in] h *unused* * @return a Pybind11 handle to the mpi4py communicator */ - static py::handle cast([[maybe_unused]] const plssvm::mpi::communicator &comm, py::return_value_policy, py::handle) { + static py::handle cast([[maybe_unused]] const plssvm::mpi::communicator &comm, [[maybe_unused]] const py::return_value_policy rvp, [[maybe_unused]] const py::handle h) { #if defined(PLSSVM_HAS_MPI_ENABLED) // we have MPI enabled try { @@ -61,10 +63,11 @@ struct type_caster { /** * @brief Try converting a Python object @p obj to a plssvm::mpi::communicator. * @param[in] obj the object to convert + * @params[in] allow_implicit_conversions *unused* * @return `true` if the conversion was successful, `false` otherwise * @throws py::value_error if PLSSVM was built without MPI support, but a communicator was explicitly provided in Python */ - bool load([[maybe_unused]] py::handle obj, bool) { + bool load([[maybe_unused]] py::handle obj, [[maybe_unused]] const bool allow_implicit_conversion) { #if defined(PLSSVM_HAS_MPI_ENABLED) try { // check if we can find mpi4py @@ -75,31 +78,28 @@ struct type_caster { const MPI_Fint f_handle = obj.attr("py2f")().cast(); value = plssvm::mpi::communicator{ MPI_Comm_f2c(f_handle) }; return true; - } else { - // something else was provided -> abort type casting - return false; } + // something else was provided -> abort type casting + return false; } catch (const py::error_already_set &) { // we couldn't find mpi4py if (obj.is_none()) { // but "comm" wasn't set -> we can use our default plssvm::mpi::communicator value = plssvm::mpi::communicator{}; return true; - } else { - // something was provided -> abort type casting - return false; } + // something was provided -> abort type casting + return false; } #else // we haven't MPI enabled -> check whether the "comm" argument has been provided if (!obj.is_none()) { // "comm" has been provided -> we can't use it -> throw an exception throw py::value_error{ "ERROR: an MPI communicator was explicitly provided, but PLSSVM was built without support for MPI!" }; - } else { - // "comm" was not provided -> use a default constructed plssvm::mpi::communicator that essentially does nothing - value = plssvm::mpi::communicator{}; - return true; } + // "comm" was not provided -> use a default constructed plssvm::mpi::communicator that essentially does nothing + value = plssvm::mpi::communicator{}; + return true; #endif } }; diff --git a/bindings/Python/utility.hpp b/bindings/Python/utility.hpp index f795b4d2a..b9e26c574 100644 --- a/bindings/Python/utility.hpp +++ b/bindings/Python/utility.hpp @@ -21,13 +21,12 @@ #include "fmt/format.h" // fmt::format #include "pybind11/numpy.h" // py::array, py::array_t, py::buffer_info, py::array::c_style -#include "pybind11/pybind11.h" // py::kwargs, py::value_error, py::isinstance, py::str, py::module_, py::register_exception_translator, py::set_error, py::object, py::len, py::enum_, py::implicitly_convertible +#include "pybind11/pybind11.h" // py::kwargs, py::value_error, py::isinstance, py::str, py::module_, py::enum_, py::register_exception_translator, py::set_error, py::object, py::len, py::exception #include "pybind11/pytypes.h" // py::type, py::ssize_t #include // fixed-width integers #include // std::memcpy #include // std::exception_ptr, std::rethrow_exception -#include // std::istringstream #include // std::string #include // std::string_view #include // std::is_same_v, std::false_type @@ -112,8 +111,8 @@ inline void check_kwargs_for_correctness(const py::kwargs &args, const std::vect */ template void register_py_exception(py::module_ &m, const std::string &py_exception_name, BaseException &base_exception) { - static py::exception py_exception(m, py_exception_name.c_str(), base_exception.ptr()); - py::register_exception_translator([](std::exception_ptr p) { + static const py::exception py_exception(m, py_exception_name.c_str(), base_exception.ptr()); + py::register_exception_translator([](std::exception_ptr p) { // NOLINT(performance-unnecessary-value-param): const & does not compile try { if (p) { std::rethrow_exception(p); @@ -196,7 +195,7 @@ PLSSVM_CREATE_PYTHON_TYPE_NAME_MAPPING(std::string, "str") * @return the constructed @p Instance wrapped in a std::variant of type @p PossibleTypes (`[[nodiscard]]`) */ template