diff --git a/.figures/classification_example.png b/.figures/classification_example.png new file mode 100644 index 000000000..4343b7b88 Binary files /dev/null and b/.figures/classification_example.png differ diff --git a/.figures/plssvm_bokeh.gif b/.figures/plssvm_bokeh.gif new file mode 100644 index 000000000..8cad0eaf4 Binary files /dev/null and b/.figures/plssvm_bokeh.gif differ diff --git a/.figures/regression_example.png b/.figures/regression_example.png new file mode 100644 index 000000000..b6cab4ab1 Binary files /dev/null and b/.figures/regression_example.png differ diff --git a/.figures/sklearn_examples/classifier_comparison.png b/.figures/sklearn_examples/classifier_comparison.png new file mode 100644 index 000000000..9fcdacc85 Binary files /dev/null and b/.figures/sklearn_examples/classifier_comparison.png differ diff --git a/.figures/sklearn_examples/decision_boundaries_via_coef_and_intercept.png b/.figures/sklearn_examples/decision_boundaries_via_coef_and_intercept.png new file mode 100644 index 000000000..b3e0d2f2d Binary files /dev/null and b/.figures/sklearn_examples/decision_boundaries_via_coef_and_intercept.png differ diff --git a/.figures/sklearn_examples/decision_boundary_confidence.png b/.figures/sklearn_examples/decision_boundary_confidence.png new file mode 100644 index 000000000..93eb03a6a Binary files /dev/null and b/.figures/sklearn_examples/decision_boundary_confidence.png differ diff --git a/.figures/sklearn_examples/different_classifiers.png b/.figures/sklearn_examples/different_classifiers.png new file mode 100644 index 000000000..e47aea6e4 Binary files /dev/null and b/.figures/sklearn_examples/different_classifiers.png differ diff --git a/.figures/sklearn_examples/digit_classification_1.png b/.figures/sklearn_examples/digit_classification_1.png new file mode 100644 index 000000000..f291df815 Binary files /dev/null and b/.figures/sklearn_examples/digit_classification_1.png differ diff --git a/.figures/sklearn_examples/digit_classification_2.png b/.figures/sklearn_examples/digit_classification_2.png new file mode 100644 index 000000000..32e78979e Binary files /dev/null and b/.figures/sklearn_examples/digit_classification_2.png differ diff --git a/.figures/sklearn_examples/digit_classification_confusion_matrix.png b/.figures/sklearn_examples/digit_classification_confusion_matrix.png new file mode 100644 index 000000000..d34e72afc Binary files /dev/null and b/.figures/sklearn_examples/digit_classification_confusion_matrix.png differ diff --git a/.figures/sklearn_examples/face_recognition.png b/.figures/sklearn_examples/face_recognition.png new file mode 100644 index 000000000..3e61c9155 Binary files /dev/null and b/.figures/sklearn_examples/face_recognition.png differ diff --git a/.figures/sklearn_examples/face_recognition_confusion_matrix.png b/.figures/sklearn_examples/face_recognition_confusion_matrix.png new file mode 100644 index 000000000..3bcb33fc8 Binary files /dev/null and b/.figures/sklearn_examples/face_recognition_confusion_matrix.png differ diff --git a/.figures/sklearn_examples/face_recognition_eigenfaces.png b/.figures/sklearn_examples/face_recognition_eigenfaces.png new file mode 100644 index 000000000..2553e7f97 Binary files /dev/null and b/.figures/sklearn_examples/face_recognition_eigenfaces.png differ diff --git a/.figures/sklearn_examples/feature_discretization.png b/.figures/sklearn_examples/feature_discretization.png new file mode 100644 index 000000000..79e0d59b4 Binary files /dev/null and b/.figures/sklearn_examples/feature_discretization.png differ diff --git a/.figures/sklearn_examples/rbf_parameters.png b/.figures/sklearn_examples/rbf_parameters.png new file mode 100644 index 000000000..6acae61a1 Binary files /dev/null and b/.figures/sklearn_examples/rbf_parameters.png differ diff --git a/.figures/sklearn_examples/rbf_parameters_3_classes.png b/.figures/sklearn_examples/rbf_parameters_3_classes.png new file mode 100644 index 000000000..e946f6335 Binary files /dev/null and b/.figures/sklearn_examples/rbf_parameters_3_classes.png differ diff --git a/.figures/sklearn_examples/rbf_parameters_accuracy.png b/.figures/sklearn_examples/rbf_parameters_accuracy.png new file mode 100644 index 000000000..da5add793 Binary files /dev/null and b/.figures/sklearn_examples/rbf_parameters_accuracy.png differ diff --git a/.figures/sklearn_examples/rbf_parameters_accuracy_3_classes.png b/.figures/sklearn_examples/rbf_parameters_accuracy_3_classes.png new file mode 100644 index 000000000..5b0610e2e Binary files /dev/null and b/.figures/sklearn_examples/rbf_parameters_accuracy_3_classes.png differ diff --git a/.figures/sklearn_examples/real_world/california_housing.png b/.figures/sklearn_examples/real_world/california_housing.png new file mode 100644 index 000000000..c009e1bed Binary files /dev/null and b/.figures/sklearn_examples/real_world/california_housing.png differ diff --git a/.figures/sklearn_examples/real_world/fashion_mnist.png b/.figures/sklearn_examples/real_world/fashion_mnist.png new file mode 100644 index 000000000..1f308444e Binary files /dev/null and b/.figures/sklearn_examples/real_world/fashion_mnist.png differ diff --git a/.figures/sklearn_examples/real_world/fashion_mnist_confusion_matrix.png b/.figures/sklearn_examples/real_world/fashion_mnist_confusion_matrix.png new file mode 100644 index 000000000..ee6ef26db Binary files /dev/null and b/.figures/sklearn_examples/real_world/fashion_mnist_confusion_matrix.png differ diff --git a/.figures/sklearn_examples/real_world/svhn.png b/.figures/sklearn_examples/real_world/svhn.png new file mode 100644 index 000000000..61bc78ecf Binary files /dev/null and b/.figures/sklearn_examples/real_world/svhn.png differ diff --git a/.figures/sklearn_examples/real_world/svhn_confusion_matrix.png b/.figures/sklearn_examples/real_world/svhn_confusion_matrix.png new file mode 100644 index 000000000..e7506cee2 Binary files /dev/null and b/.figures/sklearn_examples/real_world/svhn_confusion_matrix.png differ diff --git a/.figures/sklearn_examples/separating_hyperplane.png b/.figures/sklearn_examples/separating_hyperplane.png new file mode 100644 index 000000000..53b7a350f Binary files /dev/null and b/.figures/sklearn_examples/separating_hyperplane.png differ diff --git a/.figures/sklearn_examples/svm_anova.png b/.figures/sklearn_examples/svm_anova.png new file mode 100644 index 000000000..3e0fe41c5 Binary files /dev/null and b/.figures/sklearn_examples/svm_anova.png differ diff --git a/.figures/sklearn_examples/svm_kernels_data.png b/.figures/sklearn_examples/svm_kernels_data.png new file mode 100644 index 000000000..e7d776358 Binary files /dev/null and b/.figures/sklearn_examples/svm_kernels_data.png differ diff --git a/.figures/sklearn_examples/svm_kernels_laplacian.png b/.figures/sklearn_examples/svm_kernels_laplacian.png new file mode 100644 index 000000000..cfd64a135 Binary files /dev/null and b/.figures/sklearn_examples/svm_kernels_laplacian.png differ diff --git a/.figures/sklearn_examples/svm_kernels_linear.png b/.figures/sklearn_examples/svm_kernels_linear.png new file mode 100644 index 000000000..9cc493483 Binary files /dev/null and b/.figures/sklearn_examples/svm_kernels_linear.png differ diff --git a/.figures/sklearn_examples/svm_kernels_poly.png b/.figures/sklearn_examples/svm_kernels_poly.png new file mode 100644 index 000000000..7a3486bb9 Binary files /dev/null and b/.figures/sklearn_examples/svm_kernels_poly.png differ diff --git a/.figures/sklearn_examples/svm_kernels_rbf.png b/.figures/sklearn_examples/svm_kernels_rbf.png new file mode 100644 index 000000000..b6201c766 Binary files /dev/null and b/.figures/sklearn_examples/svm_kernels_rbf.png differ diff --git a/.figures/sklearn_examples/svm_kernels_sigmoid.png b/.figures/sklearn_examples/svm_kernels_sigmoid.png new file mode 100644 index 000000000..ce15ef6a8 Binary files /dev/null and b/.figures/sklearn_examples/svm_kernels_sigmoid.png differ diff --git a/.figures/sklearn_examples/svm_kernels_xor.png b/.figures/sklearn_examples/svm_kernels_xor.png new file mode 100644 index 000000000..3fa8b491e Binary files /dev/null and b/.figures/sklearn_examples/svm_kernels_xor.png differ diff --git a/.figures/sklearn_examples/svm_margin.png b/.figures/sklearn_examples/svm_margin.png new file mode 100644 index 000000000..754b6df6c Binary files /dev/null and b/.figures/sklearn_examples/svm_margin.png differ diff --git a/.figures/sklearn_examples/svm_regression.png b/.figures/sklearn_examples/svm_regression.png new file mode 100644 index 000000000..aa0dbfc3b Binary files /dev/null and b/.figures/sklearn_examples/svm_regression.png differ diff --git a/.github/workflows/clang_gcc_linux.yml b/.github/workflows/clang_gcc_linux.yml new file mode 100644 index 000000000..1fa8867a8 --- /dev/null +++ b/.github/workflows/clang_gcc_linux.yml @@ -0,0 +1,46 @@ +name: Linux (Ubuntu) CPU +on: + push: + branches: + - main + pull_request: + workflow_dispatch: +jobs: + Linux-Test: + runs-on: ubuntu-latest + strategy: + matrix: + build_type: [Debug, Release] + compiler: [g++, clang++] + steps: + - name: "Update system" + run: | + sudo apt update + sudo apt upgrade + sudo apt autoremove + - name: "Install Compiler" + run: | + sudo apt install g++ clang libomp-dev + - name: "Install cmake 3.31.0" + uses: lukka/get-cmake@v3.31.0 + - name: "Clone the PLSSVM repository into PLSSVM/" + uses: actions/checkout@v4.1.1 + with: + path: PLSSVM + - name: "Install Python dependencies" + run: | + pip install --upgrade pip + pip install -r PLSSVM/install/python_requirements.txt + - name: "Configure PLSSVM using CMake" + run: | + cd PLSSVM + cmake --preset openmp_test -DCMAKE_CXX_COMPILER=${{ matrix.compiler }} -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DPLSSVM_TARGET_PLATFORMS="cpu" -DPLSSVM_ENABLE_LANGUAGE_BINDINGS=ON -DPLSSVM_ENABLE_PERFORMANCE_TRACKING=ON -DPLSSVM_TEST_FILE_NUM_DATA_POINTS=50 -DPLSSVM_TEST_FILE_NUM_FEATURES=20 -DPLSSVM_ENABLE_LTO=OFF + - name: "Build PLSSVM" + run: | + cd PLSSVM + cmake --build --preset openmp_test --config ${{ matrix.build_type }} + echo "${GITHUB_WORKSPACE}/PLSSVM/build" >> $GITHUB_PATH + - name: "Run tests" + run: | + cd PLSSVM + ctest --preset openmp_test -C ${{ matrix.build_type }} --parallel 2 \ No newline at end of file diff --git a/.github/workflows/clang_macos.yml b/.github/workflows/clang_macos.yml index f83f0caa5..052bdd2a7 100644 --- a/.github/workflows/clang_macos.yml +++ b/.github/workflows/clang_macos.yml @@ -1,7 +1,10 @@ name: macOS CPU on: - workflow_dispatch: push: + branches: + - main + pull_request: + workflow_dispatch: jobs: macOS-Test: runs-on: macos-13 @@ -22,7 +25,12 @@ jobs: brew install libomp - name: "Install Python dependencies" run: | - python3 -m pip install argparse scikit-learn humanize --break-system-packages + python3 -m pip install -U pip + pip install argparse scikit-learn humanize --break-system-packages + - name: "Set PYTHONPATH" + run: | + export PYTHONPATH=$(python3 -c "import site; print(site.getsitepackages()[0])") + echo "PYTHONPATH=$PYTHONPATH" >> $GITHUB_ENV - name: "Clone the PLSSVM repository into PLSSVM/" uses: actions/checkout@v4.1.1 with: @@ -32,7 +40,7 @@ jobs: cd PLSSVM export LDFLAGS="-L/opt/homebrew/opt/libomp/lib" export CPPFLAGS="-I/opt/homebrew/opt/libomp/include" - cmake --preset openmp_test -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DPLSSVM_TARGET_PLATFORMS="cpu" -DPLSSVM_ENABLE_LANGUAGE_BINDINGS=ON -DPLSSVM_ENABLE_PERFORMANCE_TRACKING=ON -DPLSSVM_GENERATE_TEST_FILE=OFF -DPLSSVM_ENABLE_LTO=OFF + cmake --preset openmp_test -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DPLSSVM_TARGET_PLATFORMS="cpu" -DPLSSVM_ENABLE_LANGUAGE_BINDINGS=ON -DPLSSVM_ENABLE_PERFORMANCE_TRACKING=ON -DPLSSVM_TEST_FILE_NUM_DATA_POINTS=50 -DPLSSVM_TEST_FILE_NUM_FEATURES=20 -DPLSSVM_ENABLE_LTO=OFF - name: "Build PLSSVM" shell: bash run: | diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index 20552191d..8632723be 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -31,6 +31,7 @@ jobs: cmake --preset all -DPLSSVM_TARGET_PLATFORMS="cpu" -DPLSSVM_ENABLE_FORMATTING=ON # check source file formatting - name: "Check source file formatting via clang-format" + if: always() run: | set +e cd PLSSVM @@ -53,6 +54,7 @@ jobs: if-no-files-found: ignore # check CMake formatting - name: "Check CMake formatting via cmake-format" + if: always() run: | set +e cd PLSSVM diff --git a/.github/workflows/msvc_windows.yml b/.github/workflows/msvc_windows.yml index d31b5df7f..f833ad83a 100644 --- a/.github/workflows/msvc_windows.yml +++ b/.github/workflows/msvc_windows.yml @@ -1,7 +1,10 @@ name: Windows MSVC CPU on: - workflow_dispatch: push: + branches: + - main + pull_request: + workflow_dispatch: jobs: MSVC-Test: runs-on: windows-latest diff --git a/.github/workflows/pip.yml b/.github/workflows/pip.yml new file mode 100644 index 000000000..cf8e9d991 --- /dev/null +++ b/.github/workflows/pip.yml @@ -0,0 +1,66 @@ +name: pip install +on: + push: + branches: + - main + pull_request: + workflow_dispatch: +jobs: + pip-Test: + runs-on: ubuntu-latest + steps: + - name: "Update system" + run: | + sudo apt update + sudo apt upgrade + sudo apt autoremove + - name: "Install new g++" + run: | + sudo apt install g++ + - name: "Clone the PLSSVM repository into PLSSVM/" + uses: actions/checkout@v4.1.1 + with: + path: PLSSVM + - name: "Set up Python" + uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: "Create and activate virtual environment" + run: | + python -m venv venv + source venv/bin/activate + echo "VIRTUAL_ENV=$VIRTUAL_ENV" >> $GITHUB_ENV + - name: "Install Python dependencies" + run: | + source $VIRTUAL_ENV/bin/activate + pip install --upgrade pip + pip install -r PLSSVM/install/python_requirements.txt + - name: "Build PLSSVM locally via pip" + run: | + source $VIRTUAL_ENV/bin/activate + cd PLSSVM + pip install . + - name: "Run examples" + env: + MPLBACKEND: Agg + run: | + source $VIRTUAL_ENV/bin/activate + set -e + cd PLSSVM/examples/python + python sklearn_like_svc.py + python sklearn_like_svr.py + cd sklearn + python plot_classifier_comparison.py + python plot_decision_boundaries_via_coef_and_intercept.py + python plot_decision_boundary_confidence.py + python plot_different_classifiers.py + python plot_digits_classification.py + python plot_face_recognition.py + python plot_feature_discretization.py + python plot_rbf_parameters.py + python plot_rbf_parameters_3_classes.py + python plot_separating_hyperplane.py + python plot_svm_anova.py + python plot_svm_kernels.py + python plot_svm_margin.py + python plot_svm_regression.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 84e730eec..7ef583bd8 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -93,20 +93,22 @@ set(PLSSVM_BASE_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/detail/string_utility.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/detail/utility.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/exceptions/exceptions.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/svm/csvm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/version/version.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/version/git_metadata/git_metadata.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/backend_types.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/classification_report.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/classification_types.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/csvm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/file_format_types.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/gamma.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/kernel_function_types.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/matrix.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/parameter.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/regression_report.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/shape.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/solver_types.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/target_platforms.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/svm_types.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/verbosity_levels.cpp ) @@ -166,7 +168,9 @@ target_compile_options( /wd4005 /wd4702 /wd4849 - /wd4127> + /wd4127 + /wd4250 # disable multiple-inheritance dominance warning + /wd4589> # disable constructor inheritance warning # /wd4849: ignore "OpenMP 'reduction' clause ignored in 'simd' directive" -> no SIMD clause currently effective in MSVC /wd4127: ignore "conditional # expression is constant" from {fmt} ranges.h header ) @@ -211,11 +215,11 @@ endif () ######################################################################################################################## # check for OpenMP (not for the backend!) # ######################################################################################################################## -find_package(OpenMP 4.0 QUIET) +find_package(OpenMP QUIET) if (OpenMP_FOUND) message(STATUS "Found OpenMP ${OpenMP_CXX_VERSION} to speed-up library utilities (like file parsing).") set(PLSSVM_FOUND_OPENMP_FOR_UTILITY ON) - if (${CMAKE_CXX_COMPILER_ID} MATCHES "MSVC") + if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") target_compile_options(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC -openmp:llvm -openmp:experimental) else () target_link_libraries(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC OpenMP::OpenMP_CXX) @@ -326,17 +330,6 @@ else () ) endif () - include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/check_python_libs.cmake) - set(PLSSVM_TARGET_PLATFORMS_PYTHON_SCRIPT_REQUIRED_LIBS cpuinfo GPUtil pyamdgpuinfo pylspci) - message( - STATUS - "Checking required Python3 libraries (${PLSSVM_TARGET_PLATFORMS_PYTHON_SCRIPT_REQUIRED_LIBS}) to automatically determine the PLSSVM_TARGET_PLATFORMS." - ) - set(PLSSVM_TARGET_PLATFORMS_PYTHON_SCRIPT_REQUIRED_LIBS_ERROR_MESSAGE - "or manually define PLSSVM_TARGET_PLATFORMS (e.g. -DPLSSVM_TARGET_PLATFORMS=\"cpu;nvidia:sm_70,sm_86;amd:gfx906;intel:skl\"!" - ) - check_python_libs(${PLSSVM_TARGET_PLATFORMS_PYTHON_SCRIPT_REQUIRED_LIBS} ${PLSSVM_TARGET_PLATFORMS_PYTHON_SCRIPT_REQUIRED_LIBS_ERROR_MESSAGE}) - # run our `plssvm_target_platforms.py` script to determine the PLSSVM_TARGET_PLATFORMS string execute_process( COMMAND ${Python3_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/utility_scripts/plssvm_target_platforms.py" "--quiet" diff --git a/README.md b/README.md index 7325d3e00..b367cb9cf 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -![![PLSSVM](../resources/logo_245x150.png)](docs/resources/logo_245x150.png) +![PLSSVM](https://github.com/SC-SGS/PLSSVM/raw/main/docs/resources/logo_245x150.png) # PLSSVM - Parallel Least Squares Support Vector Machine @@ -12,6 +12,7 @@ - [Building PLSSVM](#building-plssvm) - [Running the Tests](#running-the-tests) - [Generating Test Coverage Results](#generating-test-coverage-results) + - [Automatic Source File Formatting](#automatic-source-file-formatting) - [Creating the Documentation](#creating-the-documentation) - [Installing](#installing) - [Usage](#usage) @@ -20,7 +21,7 @@ - [Predicting using `plssvm-predict`](#predicting-using-plssvm-predict) - [Data Scaling using `plssvm-scale`](#data-scaling-using-plssvm-scale) - [Example Code for PLSSVM Used as a Library](#example-code-for-plssvm-used-as-a-library) - - [Example Using the Python Bindings Available For PLSSVM](#example-using-the-python-bindings-available-for-plssvm) + - [Example Using the `sklearn` Python Bindings Available For PLSSVM](#example-using-the-sklearn-like-python-bindings-available-for-plssvm) - [Citing PLSSVM](#citing-plssvm) - [License](#license) @@ -34,7 +35,7 @@ To predict to which class a new, unseen data point belongs, the SVM simply has t This is very efficient since it only involves a single scalar product of the size corresponding to the numer of features of the data set.

- Basic idea of an Support Vector Machine as classification model. + Basic idea of an Support Vector Machine as classification model.

However, normal SVMs suffer in their potential parallelizability. @@ -79,9 +80,12 @@ The main highlights of our SVM implementations are: 5. Multi-class classification available via one vs. all (also one vs. rest or OAA) and one vs. one (also OAO): - OAA: one huge classification task where our CG algorithm solves a system of linear equations with multiple right-hand sides. The resulting model file is **not** compatible with LIBSVM. - OAO: constructs many but smaller binary classifications. The resulting model file is **fully** compatible with LIBSVM. -6. Multi-GPU support for **all** kernel functions and GPU backends for `fit` as well as `predict/score` (**note**: no multi-GPU support for the stdpar backend even if run on a GPU!). -7. Python bindings as drop-in replacement for `sklearn.SVC` (some features currently not implemented). +6. Also, support for the regression task. +7. Multi-GPU support for **all** kernel functions and GPU backends for `fit` as well as `predict/score` (**note**: no multi-GPU support for the stdpar backend even if run on a GPU!). +8. Python bindings as drop-in replacement for `sklearn.SVC` and `sklearn.SVR` (some features currently not implemented). +To see the full power of Support Vector Machines, have a look at our live visualization examples in +[examples/python/interactive](examples/python/interactive/README.md). ## Getting Started @@ -144,7 +148,7 @@ Additional dependencies for the stdpar backend: - `AdaptiveCpp`: Intel's [TBB](https://github.com/wjakob/tbb) library - `GNU GCC`: [Boost ≥ 1.73.0](https://www.boost.org/) with the `atomic` library enabled and Intel's [TBB](https://github.com/wjakob/tbb) library -Additional dependencies if `PLSSVM_ENABLE_TESTING` and `PLSSVM_GENERATE_TEST_FILE` are both set to `ON`: +Additional dependencies if `PLSSVM_ENABLE_TESTING` and `PLSSVM_GENERATE_TEST_FILES` are both set to `ON`: - [Python3](https://www.python.org/) with the [`argparse`](https://docs.python.org/3/library/argparse.html), [`timeit`](https://docs.python.org/3/library/timeit.html), [`sklearn`](https://scikit-learn.org/stable/), and [`humanize`](https://pypi.org/project/humanize/) modules @@ -186,13 +190,9 @@ Valid targets are: - `intel`: compile for Intel GPUs; **at least one** architectural specification is necessary, e.g., `intel:skl` At least one of the above targets must be present. If the option `PLSSVM_TARGET_PLATFORMS` is not present, the targets -are automatically determined using the Python3 `utility_scripts/plssvm_target_platforms.py` script (required Python3 dependencies: -[`argparse`](https://docs.python.org/3/library/argparse.html), [`py-cpuinfo`](https://pypi.org/project/py-cpuinfo/), -[`GPUtil`](https://pypi.org/project/GPUtil/), [`pyamdgpuinfo`](https://pypi.org/project/pyamdgpuinfo/), and -[`pylspci`](https://pypi.org/project/pylspci/)). +are automatically determined using the Python3 `utility_scripts/plssvm_target_platforms.py` script. -Note that when using DPC++/icpx only a single architectural specification for `cpu`, `nvidia` or `amd` is allowed and that -automatically retrieving AMD GPU information on Windows is currently not supported due to `pyamdgpuinfo` limitations. +Note that when using DPC++/icpx only a single architectural specification for `cpu`, `nvidia` or `amd` is allowed. ```bash @@ -202,8 +202,9 @@ python3 utility_scripts/plssvm_target_platforms.py --help usage: plssvm_target_platforms.py [-h] [--quiet] optional arguments: - -h, --help show this help message and exit - --quiet only output the final PLSSVM_TARGET_PLATFORMS string + -h, --help show this help message and exit + --quiet only output the final PLSSVM_TARGET_PLATFORMS string + --gpus_only only output gpu architectures to the final PLSSVM_TARGET_PLATFORMS string ``` Example invocation: @@ -212,10 +213,9 @@ Example invocation: python3 utility_scripts/plssvm_target_platforms.py ``` ``` -Intel(R) Core(TM) i9-10980XE CPU @ 3.00GHz: {'avx512': True, 'avx2': True, 'avx': True, 'sse4_2': True} +supported CPU SIMD flags: {'avx512': True, 'avx2': True, 'avx': True, 'sse4_2': True} -Found 1 NVIDIA GPU(s): - 1x NVIDIA GeForce RTX 3080: sm_86 +Found 1 NVIDIA GPU(s): [sm_86] Possible -DPLSSVM_TARGET_PLATFORMS entries: cpu:avx512;nvidia:sm_86 @@ -229,13 +229,11 @@ or with the `--quiet` flag provided: python3 utility_scripts/plssvm_target_platforms.py --quiet ``` ``` -cpu:avx512;intel:dg1 +cpu:avx512;nvidia:sm_86 ``` If the architectural information for the requested GPU could not be retrieved, one option would be to have a look at: -- for NVIDIA GPUs: [Your GPU Compute Capability](https://developer.nvidia.com/cuda-gpus) -- for AMD GPUs: [clang AMDGPU backend usage](https://llvm.org/docs/AMDGPUUsage.html) - for Intel GPUs and CPUs: [Ahead of Time Compilation](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-dpcpp-cpp-compiler-dev-guide-and-reference/top/compilation/ahead-of-time-compilation.html) and [Intel graphics processor table](https://dgpu-docs.intel.com/devices/hardware-table.html) @@ -301,9 +299,9 @@ The `[optional_options]` can be one or multiple of: If `PLSSVM_ENABLE_TESTING` is set to `ON`, the following option can also be set: -- `PLSSVM_GENERATE_TEST_FILE=ON|OFF` (default: `ON`): automatically generate test files +- `PLSSVM_GENERATE_TEST_FILES=ON|OFF` (default: `ON`): automatically generate test files -If `PLSSVM_GENERATE_TEST_FILE` is set to `ON`, the following options can also be set: +If `PLSSVM_GENERATE_TEST_FILES` is set to `ON`, the following options can also be set: - `PLSSVM_TEST_FILE_NUM_DATA_POINTS` (default: `5000`): the number of data points in the test file - `PLSSVM_TEST_FILE_NUM_FEATURES` (default: `2000`): the number of features per data point in the test file @@ -322,10 +320,6 @@ If `PLSSVM_ENABLE_LANGUAGE_BINDINGS` is set to `ON`, the following option can al - `PLSSVM_ENABLE_PYTHON_BINDINGS=ON|OFF` (default: `PLSSVM_ENABLE_LANGUAGE_BINDINGS`): enable Python bindings using Pybind11; **note:** `PLSSVM_ENABLE_LANGUAGE_BINDINGS` must be set that this option has any effect -If `PLSSVM_ENABLE_PYTHON_BINDINGS` is set to `ON`, the following options can also be set: - -- `PLSSVM_PYTHON_BINDINGS_PREFERRED_LABEL_TYPE` (default: `std::string`): the default `label_type` used if the generic `plssvm.Model` and `plssvm.DataSet` Python classes are used - If the OpenCL backend is available and NVIDIA GPUs should be targeted, an additional option can be set. - `PLSSVM_OPENCL_BACKEND_ENABLE_PTX_INLINE_ASSEMBLY=ON|OFF` (default: `ON`): enable PTX inline assembly to speed up the FP32/FP64 atomicAdd implementations on NVIDIA GPUs. **Note:** requires `sm_60` or newer! @@ -500,6 +494,8 @@ The documentation of the current state of the main branch can be found [here](ht ### Installing +#### Install via CMake + The library supports the `install` target: ```bash @@ -517,6 +513,48 @@ export LD_LIBRARY_PATH=${CMAKE_INSTALL_PREFIX}/lib:${CMAKE_INSTALL_PREFIX}/lib64 export CPLUS_INCLUDE_PATH=${CMAKE_INSTALL_PREFIX}/include:${CPLUS_INCLUDE_PATH} ``` +If our library was built with the Python bindings enabled, the `PYTHONPATH` must additionally be set: + +```bash +export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${CMAKE_INSTALL_PREFIX}/lib64:${PYTHONPATH} +``` + +#### Install via pip + +We also support a pip packages that can be used to install our library: + +```bash +pip install plssvm +``` + +This pip install behaves **as if** the CMake `all_python` preset is used. +This means that the `PLSSVM_TARGET_PLATFORMS` are automatically determined and PLSSVM is build with all supported +backends that available on the target machine at the point of the `pip install plssvm` invocation. +To check the installation, including, e.g., the installed backends, we provide the `plssvm-install-check` command after +PLSSVM has been installed via pip. +An example output of this command can look like: + +```text +PLSSVM - Parallel Least Squares Support Vector Machine (3.0.0) + +Copyright(C) 2018-today The PLSSVM project - All Rights Reserved +This is free software distributed under the MIT license. + +Available target platforms: TargetPlatform.AUTOMATIC, TargetPlatform.GPU_NVIDIA, TargetPlatform.CPU +Default target platform: TargetPlatform.GPU_NVIDIA + +Available backends: BackendType.AUTOMATIC, BackendType.OPENMP, BackendType.CUDA, BackendType.OPENCL, BackendType.SYCL +Default backend for target platform TargetPlatform.GPU_NVIDIA: BackendType.CUDA +Default backend for target platform TargetPlatform.CPU: BackendType.SYCL + +Available SYCL implementations: ImplementationType.AUTOMATIC, ImplementationType.ADAPTIVECPP + + +Repository: https://github.com/SC-SGS/PLSSVM.git +Documentation: https://sc-sgs.github.io/PLSSVM +Issues: https://github.com/SC-SGS/PLSSVM/issues +``` + ## Usage PLSSVM provides three executables: `plssvm-train`, `plssvm-predict`, and `plssvm-scale`. @@ -525,7 +563,7 @@ For more information, see the respective `man` pages which are installed via `cm ### Generating Artificial Data -The repository comes with a Python3 script (in the `utility_scripts/` directory) to simply generate arbitrarily large data sets. +The repository comes with a Python3 script (in the `utility_scripts/` directory) to simply generate arbitrarily large classification and regression data sets. In order to use all functionality, the following Python3 modules must be installed: [`argparse`](https://docs.python.org/3/library/argparse.html), [`timeit`](https://docs.python.org/3/library/timeit.html), @@ -535,26 +573,58 @@ In order to use all functionality, the following Python3 modules must be install and [`humanize`](https://pypi.org/project/humanize/). ``` -usage: generate_data.py [-h] [--output OUTPUT] [--format FORMAT] [--problem PROBLEM] --samples SAMPLES [--test_samples TEST_SAMPLES] --features FEATURES [--classes CLASSES] [--plot] +usage: generate_data.py [-h] [--output OUTPUT] [--format FORMAT] --samples SAMPLES [--test_samples TEST_SAMPLES] --features FEATURES [--scale SCALE SCALE] [--plot] {classification,regression} ... -options: - -h, --help show this help message and exit +positional arguments: + {classification,regression} + classification create a classification data set + regression create regression data set + +optional arguments: + -h, -?, --help show this help message and exit --output OUTPUT the output file to write the samples to (without extension) --format FORMAT the file format; either arff, libsvm, or csv - --problem PROBLEM the problem to solve; one of: blobs, blobs_merged, planes, ball --samples SAMPLES the number of training samples to generate --test_samples TEST_SAMPLES the number of test samples to generate; default: 0 --features FEATURES the number of features per data point - --classes CLASSES the number of classes to generate; default: 2 + --scale SCALE SCALE scale the features to the provided range --plot plot training samples; only possible if 0 < samples <= 2000 and 1 < features <= 3 + + +classification specific arguments: + +usage: generate_data.py classification [-h] [--problem {blobs,blobs_merged,planes,ball}] [--classes CLASSES] + +optional arguments: + -h, --help show this help message and exit + --problem {blobs,blobs_merged,planes,ball} + the problem to solve + --classes CLASSES the number of classes to generate; default: 2 + + +regression specific arguments: + +usage: generate_data.py regression [-h] [--problem {linear,linear_noisy,friedman1}] + +optional arguments: + -h, --help show this help message and exit + --problem {linear,linear_noisy,friedman1} + the problem to solve + ``` -An example invocation generating a data set consisting of blobs with 1000 data points with 200 features each and +An example invocation generating a classification data set consisting of blobs with 1000 data points with 200 features each and 4 classes could look like: ```bash -python3 generate_data.py --output data_file --format libsvm --problem blobs --samples 1000 --features 200 --classes 4 +python3 generate_data.py --output data_file --format libsvm --problem blobs --samples 1000 --features 200 classification --classes 4 +``` + +An example invocation generating a linear regression data set consisting of 1000 data points with 200 features each could look like: + +```bash +python3 generate_data.py --output data_file --format libsvm --problem linear --samples 1000 --features 200 regression ``` ### Training using `plssvm-train` @@ -567,6 +637,9 @@ LS-SVM with multiple (GPU-)backends Usage: ./plssvm-train [OPTION...] training_set_file [model_file] + -s, --svm_type arg set type of SVM + 0 -- C-SVC + 1 -- C-SVR (default: 0) -t, --kernel_type arg set type of kernel function. 0 -- linear: u'*v 1 -- polynomial: (gamma*u'*v+coef0)^degree @@ -575,10 +648,10 @@ Usage: 4 -- laplacian: exp(-gamma*|u-v|_1) 5 -- chi_squared: exp(-gamma*sum_i((x[i]-y[i])^2/(x[i]+y[i]))) (default: 2) -d, --degree arg set degree in kernel function (default: 3) - -g, --gamma arg set gamma in kernel function (default: automatic) + -g, --gamma arg set gamma in kernel function (default: "1 / num_features") -r, --coef0 arg set coef0 in kernel function (default: 0) -c, --cost arg set the parameter C (default: 1) - -e, --epsilon arg set the tolerance of termination criterion (default: 0.001) + -e, --epsilon arg set the tolerance of termination criterion (default: 1e-10) -i, --max_iter arg set the maximum number of CG iterations (default: num_features) -l, --solver arg choose the solver: automatic|cg_explicit|cg_implicit (default: automatic) -a, --classification arg the classification strategy to use for multi-class classification: oaa|oao (default: oaa) @@ -651,7 +724,36 @@ If the `--kokkos_execution_space` is `automatic`, uses the best fitting executio ### Predicting using `plssvm-predict` Our predict utility is fully conform to LIBSVM's model files. -This means that our `plssvm-predict` can be used on model files learned with, e.g., LIBSVM's `svm-train`. +This means that our `plssvm-predict` can be used on model files learned with, e.g., LIBSVM's `svm-train`. +Note: this is not the case for the regression task since the `svm_type` filed mismatch between LIBSVM (`epsilon_svr`) +and PLSSVM (`c_svr`). To automatically convert between the two, simply use the `convert_model.py` script +(in the `utility_scripts/` directory) which simply replaces these fields with the respectively expected one +(note that for large files doing that manually may be faster): + +```bash +usage: convert_model.py [-h] [-o OUTPUT] [--to_plssvm] [--to_libsvm] model_file + +positional arguments: + model_file the regression model file to convert + +options: + -h, --help show this help message and exit + -o OUTPUT, --output OUTPUT + output the regression model to the new file, otherwise the regression model us updated inplace + --to_plssvm convert the regression model to a PLSSVM conform model file + --to_libsvm convert the regression model to a LIBSVM conform model file +``` + +An example invocation could look like: + +```bash +python3 convert_model.py --to_libsvm -o 5x4_libsvm.libsvm.model 5x4.libsvm.model +``` +``` +Converting a PLSSVM model file to a LIBSVM model file. +``` + +After a correct model file exists, predict works as follows: ```bash ./plssvm-predict --help @@ -734,7 +836,7 @@ An example invocation to scale a train and test file in the same way looks like: ### Example Code for PLSSVM Used as a Library -A simple C++ program (`main.cpp`) using PLSSVM as library could look like: +A simple C++ program (`main_classification.cpp`) using PLSSVM as library for classification could look like: ```cpp #include "plssvm/core.hpp" @@ -748,26 +850,26 @@ int main() { plssvm::environment::scope_guard environment_guard{}; try { - // create a new C-SVM parameter set, explicitly overriding the default kernel function + // create a new C-SVC parameter set, explicitly overriding the default kernel function const plssvm::parameter params{ plssvm::kernel_type = plssvm::kernel_function_type::polynomial }; // create two data sets: one with the training data scaled to [-1, 1] // and one with the test data scaled like the training data - const plssvm::data_set train_data{ "train_file.libsvm", { -1.0, 1.0 } }; - const plssvm::data_set test_data{ "test_file.libsvm", train_data.scaling_factors()->get() }; + const plssvm::classification_data_set train_data{ "train_file.libsvm", { -1.0, 1.0 } }; + const plssvm::classification_data_set test_data{ "test_file.libsvm", train_data.scaling_factors()->get() }; - // create C-SVM using the default backend and the previously defined parameter - const auto svm = plssvm::make_csvm(params); + // create C-SVC using the default backend and the previously defined parameter + const auto svc = plssvm::make_csvc(params); // fit using the training data, (optionally) set the termination criterion - const plssvm::model model = svm->fit(train_data, plssvm::epsilon = 10e-6); + const plssvm::classification_model model = svc->fit(train_data, plssvm::epsilon = 1e-6); // get accuracy of the trained model - const double model_accuracy = svm->score(model); + const double model_accuracy = svc->score(model); std::cout << "model accuracy: " << model_accuracy << std::endl; // predict the labels - const std::vector predicted_label = svm->predict(model, test_data); + const std::vector predicted_label = svc->predict(model, test_data); // output a more complete classification report const std::vector &correct_label = test_data.labels().value(); std::cout << plssvm::classification_report{ correct_label, predicted_label } << std::endl; @@ -784,10 +886,60 @@ int main() { } ``` +A simple C++ program (`main_regression.cpp`) using PLSSVM as library for regression could look like: + +```cpp +#include "plssvm/core.hpp" + +#include +#include +#include + +int main() { + // correctly initialize and finalize environments + plssvm::environment::scope_guard environment_guard{}; + + try { + // create a new C-SVR parameter set, explicitly overriding the default kernel function + const plssvm::parameter params{ plssvm::kernel_type = plssvm::kernel_function_type::polynomial }; + + // create two data sets: one with the training data scaled to [-1, 1] + // and one with the test data scaled like the training data + const plssvm::regression_data_set train_data{ "train_file.libsvm", { -1.0, 1.0 } }; + const plssvm::regression_data_set test_data{ "test_file.libsvm", train_data.scaling_factors()->get() }; + + // create C-SVR using the default backend and the previously defined parameter + const auto svr = plssvm::make_csvr(params); + + // fit using the training data, (optionally) set the termination criterion + const plssvm::regression_model model = svr->fit(train_data, plssvm::epsilon = 1e-6); + + // get accuracy of the trained model + const double model_accuracy = svr->score(model); + std::cout << "model accuracy: " << model_accuracy << std::endl; + + // predict the labels + const std::vector predicted_values = svc->predict(model, test_data); + // output a more complete regression report + const std::vector &correct_values = test_data.labels().value(); + std::cout << plssvm::regression_report{ correct_label, predicted_label } << std::endl; + + // write model file to disk + model.save("model_file.libsvm"); + } catch (const plssvm::exception &e) { + std::cerr << e.what_with_loc() << std::endl; + } catch (const std::exception &e) { + std::cerr << e.what() << std::endl; + } + + return 0; +} +``` + With a corresponding minimal CMake file: ```cmake -cmake_minimum_required(VERSION 3.16) +cmake_minimum_required(VERSION 3.25) project(LibraryUsageExample LANGUAGES CXX) @@ -796,7 +948,8 @@ find_package(plssvm REQUIRED) # CMake's COMPONENTS mechanism can also be used if a specific library component is required, e.g.: # find_package(plssvm REQUIRED COMPONENTS CUDA) -add_executable(prog main.cpp) +add_executable(classification main_classification.cpp) +add_executable(regression main_regression.cpp) target_compile_features(prog PUBLIC cxx_std_17) target_link_libraries(prog PUBLIC plssvm::all) @@ -804,58 +957,157 @@ target_link_libraries(prog PUBLIC plssvm::all) # target_link_libraries(prog PUBLIC plssvm::cuda) ``` -### Example Using the Python Bindings Available For PLSSVM +### Example Using the `sklearn` like Python Bindings Available For PLSSVM -Roughly the same can be achieved using our Python bindings with the following Python script (note: needs [`sklearn`](https://scikit-learn.org/stable/)): +A classification example using PLSSVM's `SVC` Python binding and sklearn's breast cancer data set: ```python -import plssvm -from sklearn.metrics import classification_report - -try: - # create a new C-SVM parameter set, explicitly overriding the default kernel function - params = plssvm.Parameter(kernel_type=plssvm.KernelFunctionType.POLYNOMIAL) - - # create two data sets: one with the training data scaled to [-1, 1] - # and one with the test data scaled like the training data - train_data = plssvm.DataSet("train_data.libsvm", scaling=(-1.0, 1.0)) - test_data = plssvm.DataSet("test_data.libsvm", scaling=train_data.scaling_factors()) - - # create C-SVM using the default backend and the previously defined parameter - svm = plssvm.CSVM(params) - - # fit using the training data, (optionally) set the termination criterion - model = svm.fit(train_data, epsilon=10e-6) - - # get accuracy of the trained model - model_accuracy = svm.score(model) - print("model accuracy: {}".format(model_accuracy)) - - # predict labels - predicted_label = svm.predict(model, test_data) - # output a more complete classification report - correct_label = test_data.labels() - print(classification_report(correct_label, predicted_label)) - - # write model file to disk - model.save("model_file.libsvm") -except plssvm.PLSSVMError as e: - print(e) -except RuntimeError as e: - print(e) -``` - -**Note:** it may be necessary to set the environment variable `PYTHONPATH` to the `lib` folder in the PLSSVM install path. +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +######################################################################################################################## +# Authors: Alexander Van Craen, Marcel Breyer # +# Copyright (C): 2018-today The PLSSVM project - All Rights Reserved # +# License: This file is part of the PLSSVM project which is released under the MIT license. # +# See the LICENSE.md file in the project root for full license information. # +######################################################################################################################## + +import matplotlib.pyplot as plt +import sklearn.datasets +import sklearn.metrics +import sklearn.inspection +import numpy as np +from plssvm import SVC # identical to from sklearn.svm import SVC + +# load the breast cancer datasets +cancer = sklearn.datasets.load_breast_cancer() +X = cancer.data[:, :2] +y = cancer.target +y_label = cancer.target_names + +# build the SVC model +svm = SVC(kernel="rbf", gamma=0.5, C=1.0).fit(X, y) + +# score the model +print(sklearn.metrics.classification_report(y, svm.predict(X))) +print("Score: {:.2f}%".format(svm.score(X, y) * 100)) + +# plot the decision boundary +sklearn.inspection.DecisionBoundaryDisplay.from_estimator( + svm, + X, + response_method="predict", + cmap=plt.cm.Spectral, + alpha=0.8, + xlabel=cancer.feature_names[0], + ylabel=cancer.feature_names[1], +) + +# scatter plot the decision boundary +viridis = plt.cm.get_cmap('viridis', len(np.unique(y))) +plt.scatter(X[:, 0], X[:, 1], + cmap=viridis, + c=y, + s=20, edgecolors="k") + +# generate legend handles and add handle +legend_handles = [plt.scatter([], [], color=viridis(color), label=f'{label}') + for label, color in zip(y_label, np.unique(y))] +plt.legend(handles=legend_handles) + +plt.title("SVC classifier on breast cancer dataset") +plt.show() +``` +with an example output: +```text + precision recall f1-score support -```bash -export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${CMAKE_INSTALL_PREFIX}/lib64:${PYTHONPATH} + 0 0.91 0.85 0.88 212 + 1 0.91 0.95 0.93 357 + + accuracy 0.91 569 + macro avg 0.91 0.90 0.91 569 +weighted avg 0.91 0.91 0.91 569 + +Score: 91.39% +``` +

+ Example classification task breast cancer decision boundary output. +

+ +A regression example comparing PLSSVM's `SVR` Python binding and `sklearn.SVR` using a sine curve: + +```python +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +######################################################################################################################## +# Authors: Alexander Van Craen, Marcel Breyer # +# Copyright (C): 2018-today The PLSSVM project - All Rights Reserved # +# License: This file is part of the PLSSVM project which is released under the MIT license. # +# See the LICENSE.md file in the project root for full license information. # +######################################################################################################################## + +import numpy as np +import matplotlib.pyplot as plt + +# generate sample data (sine curve with noise) +X = np.sort(5 * np.random.rand(40, 1), axis=0) +y = np.sin(X).ravel() + +# add noise to targets +y[::5] += 3 * (0.5 - np.random.rand(8)) + +plt.scatter(X, y, color='darkorange', label='data') + +# fit the sklearn regression model +from sklearn.svm import SVR + +sklearn_svr_lin = SVR(kernel='linear', C=100, epsilon=0.1) +y_lin_sklearn = sklearn_svr_lin.fit(X, y).predict(X) +plt.plot(X, y_lin_sklearn, lw=2, linestyle='dashed', label='Linear model sklearn') + +sklearn_svr_poly = SVR(kernel='poly', C=100, degree=3, epsilon=0.1, coef0=1) +y_poly_sklearn = sklearn_svr_poly.fit(X, y).predict(X) +plt.plot(X, y_poly_sklearn, lw=2, linestyle='dashed', label='Polynomial model sklearn') + +sklearn_svr_rbf = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.1) +y_rbf_sklearn = sklearn_svr_rbf.fit(X, y).predict(X) +plt.plot(X, y_rbf_sklearn, lw=2, linestyle='dashed', label='RBF model sklearn') + +# fit the PLSSVM regression model +from plssvm import SVR + +plssvm_svr_lin = SVR(kernel='linear', C=100) +y_lin_plssvm = plssvm_svr_lin.fit(X, y).predict(X) +plt.plot(X, y_lin_plssvm, lw=2, label='Linear model plssvm') + +plssvm_svr_poly = SVR(kernel='poly', C=100, degree=3, coef0=1) +y_poly_plssvm = plssvm_svr_poly.fit(X, y).predict(X) +plt.plot(X, y_poly_plssvm, lw=2, label='Polynomial model plssvm') + +plssvm_svr_rbf = SVR(kernel='rbf', C=100, gamma=0.1) +y_rbf_plssvm = plssvm_svr_rbf.fit(X, y).predict(X) +plt.plot(X, y_rbf_plssvm, lw=2, label='RBF model plssvm') + +# show the result plots +plt.xlabel('data') +plt.ylabel('target') +plt.title('Support Vector Regression') +plt.legend() +plt.show() ``` +with an example output: +

+ Example regression output using a sine curve. +

-We also provide Python bindings for a `plssvm.SVC` class that offers the same interface as the [`sklearn.svm.SVC`](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html) class. -Note that currently not all functionality has been implemented in PLSSVM. +Note that currently not all sklearn `SVC` and `SVR` functionality has been implemented in PLSSVM. The respective functions will throw a Python `AttributeError` if called. For a detailed overview of the functions that are currently implemented, see [our API documentation](bindings/Python/README.md). +There are more examples located in the `examples/python/sklearn` directory that are copied from the sklearn repository and slightly changed for PLSSVM. + ## Citing PLSSVM If you use PLSSVM in your research, we kindly request you to cite: diff --git a/bindings/Python/CMakeLists.txt b/bindings/Python/CMakeLists.txt index fea4a0998..54f967f2a 100644 --- a/bindings/Python/CMakeLists.txt +++ b/bindings/Python/CMakeLists.txt @@ -35,18 +35,26 @@ set(PLSSVM_PYTHON_BINDINGS_SOURCES ${CMAKE_CURRENT_LIST_DIR}/version/version.cpp ${CMAKE_CURRENT_LIST_DIR}/backend_types.cpp ${CMAKE_CURRENT_LIST_DIR}/classification_types.cpp - ${CMAKE_CURRENT_LIST_DIR}/csvm.cpp - ${CMAKE_CURRENT_LIST_DIR}/data_set.cpp + ${CMAKE_CURRENT_LIST_DIR}/svm/csvm.cpp + ${CMAKE_CURRENT_LIST_DIR}/svm/csvc.cpp + ${CMAKE_CURRENT_LIST_DIR}/svm/csvr.cpp + ${CMAKE_CURRENT_LIST_DIR}/data_set/classification_data_set.cpp + ${CMAKE_CURRENT_LIST_DIR}/data_set/min_max_scaler.cpp + ${CMAKE_CURRENT_LIST_DIR}/data_set/regression_data_set.cpp ${CMAKE_CURRENT_LIST_DIR}/file_format_types.cpp ${CMAKE_CURRENT_LIST_DIR}/gamma.cpp ${CMAKE_CURRENT_LIST_DIR}/kernel_function_types.cpp ${CMAKE_CURRENT_LIST_DIR}/kernel_functions.cpp - ${CMAKE_CURRENT_LIST_DIR}/model.cpp + ${CMAKE_CURRENT_LIST_DIR}/model/classification_model.cpp + ${CMAKE_CURRENT_LIST_DIR}/model/regression_model.cpp ${CMAKE_CURRENT_LIST_DIR}/parameter.cpp + ${CMAKE_CURRENT_LIST_DIR}/regression_report.cpp ${CMAKE_CURRENT_LIST_DIR}/solver_types.cpp + ${CMAKE_CURRENT_LIST_DIR}/svm_types.cpp ${CMAKE_CURRENT_LIST_DIR}/target_platforms.cpp ${CMAKE_CURRENT_LIST_DIR}/verbosity_levels.cpp - ${CMAKE_CURRENT_LIST_DIR}/sklearn.cpp + ${CMAKE_CURRENT_LIST_DIR}/sklearn_svc.cpp + ${CMAKE_CURRENT_LIST_DIR}/sklearn_svr.cpp ${CMAKE_CURRENT_LIST_DIR}/main.cpp ) @@ -102,28 +110,6 @@ endif () set(PLSSVM_PYTHON_BINDINGS_LIBRARY_NAME plssvm) pybind11_add_module(${PLSSVM_PYTHON_BINDINGS_LIBRARY_NAME} ${PLSSVM_PYTHON_BINDINGS_SOURCES}) -# set default label type -set(PLSSVM_PYTHON_BINDINGS_POSSIBLE_LABEL_TYPE - "bool;char;signed char;unsigned char;short;unsigned short;int;unsigned int;long;unsigned long;long long;unsigned long long;float;double;long double;std::string" -) -set(PLSSVM_PYTHON_BINDINGS_PREFERRED_LABEL_TYPE "std::string" CACHE STRING "The preferred type of the labels for the Python bindings.") -set_property(CACHE PLSSVM_PYTHON_BINDINGS_PREFERRED_LABEL_TYPE PROPERTY STRINGS ${PLSSVM_PYTHON_BINDINGS_POSSIBLE_LABEL_TYPE}) -if (NOT "${PLSSVM_PYTHON_BINDINGS_PREFERRED_LABEL_TYPE}" IN_LIST PLSSVM_PYTHON_BINDINGS_POSSIBLE_LABEL_TYPE) - message( - FATAL_ERROR - "The provided label_type \"${PLSSVM_PYTHON_BINDINGS_PREFERRED_LABEL_TYPE}\" is not one of the allowed values: \"${PLSSVM_PYTHON_BINDINGS_POSSIBLE_LABEL_TYPE}\"" - ) -endif () -message(STATUS "The preferred label_type for the Python bindings is \"${PLSSVM_PYTHON_BINDINGS_PREFERRED_LABEL_TYPE}\".") - -# add necessary compile definitions for the default real_type and label_type -target_compile_definitions( - ${PLSSVM_PYTHON_BINDINGS_LIBRARY_NAME} PRIVATE PLSSVM_PYTHON_BINDINGS_PREFERRED_LABEL_TYPE=${PLSSVM_PYTHON_BINDINGS_PREFERRED_LABEL_TYPE} -) -if (PLSSVM_PYTHON_BINDINGS_PREFERRED_LABEL_TYPE STREQUAL "std::string") - target_compile_definitions(${PLSSVM_PYTHON_BINDINGS_LIBRARY_NAME} PRIVATE PLSSVM_PYTHON_BINDINGS_LABEL_TYPE_IS_STRING) -endif () - # add necessary compile options target_include_directories(${PLSSVM_PYTHON_BINDINGS_LIBRARY_NAME} PRIVATE ${CMAKE_CURRENT_LIST_DIR}/../..) target_link_libraries(${PLSSVM_PYTHON_BINDINGS_LIBRARY_NAME} PRIVATE ${PLSSVM_ALL_LIBRARY_NAME}) @@ -135,3 +121,13 @@ target_compile_options(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC -fPIC) # append pybind11 bindings library to installed targets append_local_and_parent(PLSSVM_TARGETS_TO_INSTALL ${PLSSVM_PYTHON_BINDINGS_LIBRARY_NAME}) + +# install necessary Python files to make pip install plssvm work correctly: +# +# - __init__.py: PLSSVM is correctly recognized as Python package +# - __cli__.py: PLSSVM's executables are correctly usable +# - __install_check__.py: custom script outputting some PLSSVM build information +include(GNUInstallDirs) +install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/__init__.py" "${CMAKE_CURRENT_SOURCE_DIR}/__cli__.py" "${CMAKE_CURRENT_SOURCE_DIR}/__install_check__.py" + DESTINATION "${CMAKE_INSTALL_LIBDIR}" +) diff --git a/bindings/Python/README.md b/bindings/Python/README.md index 04d0cee14..fdb812fbd 100644 --- a/bindings/Python/README.md +++ b/bindings/Python/README.md @@ -1,6 +1,10 @@ # The Python3 Bindings -- [Sklearn like API](#sklearn-like-api) +- [Sklearn like API for sklearn.svm.SVC](#sklearn-like-api-for-sklearnsvmsvc) + - [Parameters](#parameters) + - [Attributes](#attributes) + - [Methods](#methods) +- [Sklearn like API for sklearn.svm.SVR](#sklearn-like-api-for-sklearnsvmsvr) - [Parameters](#parameters) - [Attributes](#attributes) - [Methods](#methods) @@ -8,10 +12,11 @@ - [Enumerations](#enumerations) - [Classes and submodules](#classes-and-submodules) - [plssvm.Parameter](#plssvmparameter) - - [plssvm.DataSet](#plssvmdataset) - - [plssvm.CSVM](#plssvmcsvm) - - [plssvm.openmp.CSVM, plssvm.hpx.CSVM, plssvm.stdpar.CSVM, plssvm.cuda.CSVM, plssvm.hip.CSVM, plssvm.opencl.CSVM, plssvm.sycl.CSVM, plssvm.dpcpp.CSVM, plssvm.adaptivecpp.CSVM, plssvm.kokkos.CSVM](#plssvmopenmpcsvm-plssvmhpxcsvm-plssvmcudacsvm-plssvmhipcsvm-plssvmopenclcsvm-plssvmsyclcsvm-plssvmdpcppcsvm-plssvmadaptivecppcsvm-plssvmkokkoscsvm) - - [plssvm.Model](#plssvmmodel) + - [plssvm.ClassificationDataSet and plssvm.RegressionDataSet](#plssvmclassificationdataset-and-plssvmregressiondataset) + - [plssvm.MinMaxScaler](#plssvmminmaxscaler) + - [plssvm.CSVC and plssvm.CSVR](#plssvmcsvc-and-plssvmcsvr) + - [The backend C-SVCs and C-SVRs](#the-backend-c-svcs-and-c-svrs) + - [plssvm.ClassificationModel and plssvm.RegressionModel](#plssvmclassificationmodel-and-plssvmregressionmodel) - [plssvm.Version](#plssvmversion) - [plssvm.detail.tracking.PerformanceTracker](#plssvmdetailtrackingperformancetracker) - [plssvm.detail.tracking.Events](#plssvmdetailtrackingevent-plssvmdetailtrackingevents) @@ -19,13 +24,14 @@ - [Exceptions](#exceptions) We currently support two kinds of Python3 bindings, one reflecting the API -of [`sklearn.svm.SVC`](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html) and one extremely closely +of [`sklearn.svm.SVC`](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html) and [ +`sklearn.svm.SVR`](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html) and one extremely closely to our C++ API. **Note**: this page is solely meant as an API reference and overview. For examples see the top-level [`../../examples/`](/examples) folder. -## Sklearn like API +## Sklearn like API for `sklearn.svm.SVC` The following tables show the API provided by [`sklearn.svm.SVC`](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html) and whether we currently @@ -46,9 +52,9 @@ new `SVC`: | :white_check_mark: | `degree : int, default=3` | Degree of the polynomial kernel function (‘poly’). Must be non-negative. Ignored by all other kernels. | | :white_check_mark: | `gamma : {'scale', 'auto'} or real_type, default='scale'` | Kernel coefficient for various kernel functions. **Note**: the default in PLSSVM is 'auto'. | | :white_check_mark: | `coef0 : real_type, default=0.0` | Independent term in kernel function. It is only significant in 'poly' or 'sigmoid'. | -| :x: | `shrinking : bool, default=False` | Whether to use the shrinking heuristic. **Note**: not supported, therefore, the default is set to `False` | +| :x: | `shrinking : bool, default=False` | Whether to use the shrinking heuristic. **Note**: not supported and makes no sense for a LS-SVM, therefore, the default is set to `False`. | | :x: | `probability : bool, default=False` | Whether to enable probability estimates. | -| :white_check_mark: | `tol : real_type, default=1e-3` | Tolerance for stopping criterion. **Note**: in PLSSVM, this is equal to the (relative) epsilon used in the CG algorithm and, therefore, other values may be necessary than for `sklearn.SVC` SVM implementation. | +| :white_check_mark: | `tol : real_type, default=1e-10` | Tolerance for stopping criterion. **Note**: in PLSSVM, this is equal to the (relative) epsilon used in the CG algorithm and, therefore, other values may be necessary than for `sklearn.SVC` SVM implementation. | | :x: | `cache_size : real_type, default=0` | Specify the size of the kernel cache (in MB). **Note**: not applicable in PLSSVM. | | :x: | `class_weight : dict or 'balanced, default=None` | Set the parameter C of class i to class_weight[i]*C for SVC. If not given, all classes are supposed to have weight one. | | :white_check_mark: | `verbose : bool, default=False` | Enable verbose output. **Note**: if set to True, more information will be displayed than it would be the case with LIBSVM (and, therefore, `sklearn.svm.SVC`). | @@ -65,23 +71,23 @@ they were made available during PLSSVM's build step. The following attributes are supported by [`sklearn.svm.SVC`](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html): -| implementation status | attribute | sklearn description | -|:---------------------:|-----------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| :white_check_mark: | `class_weight_ : ndarray of shape (n_classes,)` | Multipliers of parameter C for each class. Computed based on the `class_weight` parameter. **Note**: returns all `1.0` since the `class_weight` parameter is currently not supported. | -| :white_check_mark: | `classes_ : ndarray of shape (n_classes,)` | The classes labels. | -| :x: | `coef_ : ndarray of shape (n_classes * (n_classes - 1) / 2, n_features)` | Weights assigned to the features when `kernel="linear"`. | -| :x: | `dual_coef_ : ndarray of shape (n_classes -1, n_SV)` | Dual coefficients of the support vector in the decision function, multiplied by their targets. | -| :white_check_mark: | `fit_status_ : int` | 0 if correctly fitted, 1 otherwise (will raise warning). | -| :x: | `intercept_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)` | Constants in decision function. | -| :white_check_mark: | `n_features_in_ : int` | Number of features seen during `fit`. | -| :x: | `feature_names_in_ : ndarray of shape (n_features_in_,)` | Names of features seen during `fit`. | -| :white_check_mark: | `n_iter_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)` for 'ovo' and ndarray of shape (n_classes,) for 'ovr' | Number of iterations run by the optimization routine to fit the model. The shape of this attribute depends on the number of models optimized which in turn depends on the number of classes and decision function. **Note**: for 'ovr' the values correspond to the number of CG iterations necessary for each right-hand side (i.e., class) to converge. | -| :white_check_mark: | `support_ : ndarray of shape (n_SV)` | Indices of support vectors. | -| :white_check_mark: | `support_vectors_ : ndarray of shape (n_SV, n_features)` | Support vectors. | -| :white_check_mark: | `n_support_ : ndarray of shape (n_classes,), dtype=int32` | Number of support vectors for each class. | -| :x: | `probA_ : ndarray of shape (n_classes * (n_classes - 1) / 2)` | Parameter learned in Platt scaling when `probability=True`. | -| :x: | `probB_ : ndarray of shape (n_classes * (n_classes - 1) / 2)` | Parameter learned in Platt scaling when `probability=True`. | -| :white_check_mark: | `shape_fit_ : tuple of int of shape (n_dimensions_of_X,)` | Array dimensions of training vector `X`. | +| implementation status | attribute | sklearn description | +|:---------------------:|------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| :white_check_mark: | `class_weight_ : ndarray of shape (n_classes,)` | Multipliers of parameter C for each class. Computed based on the `class_weight` parameter. **Note**: returns all `1.0` since the `class_weight` parameter is currently not supported. | +| :white_check_mark: | `classes_ : ndarray of shape (n_classes,)` | The classes labels. | +| :white_check_mark: | `coef_ : ndarray of shape (n_classes * (n_classes - 1) / 2, n_features) for ovo, ndarray of shape (n_classes, n_features) for ovr` | Weights assigned to the features when `kernel="linear"`. | +| :x: | `dual_coef_ : ndarray of shape (n_classes -1, n_SV)` | Dual coefficients of the support vector in the decision function, multiplied by their targets. | +| :white_check_mark: | `fit_status_ : int` | 0 if correctly fitted, 1 otherwise (will raise warning). | +| :white_check_mark: | `intercept_ : ndarray of shape (n_classes * (n_classes - 1) / 2,) for ovo, ndarray of shape (n_classes,) for ovr` | Constants in decision function. | +| :white_check_mark: | `n_features_in_ : int` | Number of features seen during `fit`. | +| :white_check_mark: | `feature_names_in_ : ndarray of shape (n_features_in_,)` | Names of features seen during `fit`. Only available of the data for `fit` is provided via a Pandas DataFrame and the column names are set. | +| :white_check_mark: | `n_iter_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)` for 'ovo' and ndarray of shape (n_classes,) for 'ovr' | Number of iterations run by the optimization routine to fit the model. The shape of this attribute depends on the number of models optimized which in turn depends on the number of classes and decision function. **Note**: for 'ovr' the values correspond to the number of CG iterations necessary for each right-hand side (i.e., class) to converge. | +| :white_check_mark: | `support_ : ndarray of shape (n_SV)` | Indices of support vectors. | +| :white_check_mark: | `support_vectors_ : ndarray of shape (n_SV, n_features)` | Support vectors. | +| :white_check_mark: | `n_support_ : ndarray of shape (n_classes,), dtype=int32` | Number of support vectors for each class. | +| :x: | `probA_ : ndarray of shape (n_classes * (n_classes - 1) / 2)` | Parameter learned in Platt scaling when `probability=True`. | +| :x: | `probB_ : ndarray of shape (n_classes * (n_classes - 1) / 2)` | Parameter learned in Platt scaling when `probability=True`. | +| :white_check_mark: | `shape_fit_ : tuple of int of shape (n_dimensions_of_X,)` | Array dimensions of training vector `X`. | ### Methods @@ -90,7 +96,7 @@ by [`sklearn.svm.SVC`](https://scikit-learn.org/stable/modules/generated/sklearn | implementation status | method | sklearn description | |:---------------------:|-----------------------------------------|------------------------------------------------------------------------------------------------| -| :x: | `decision_function(X)` | Evaluate the decision function for the samples in X. | +| :white_check_mark: | `decision_function(X)` | Evaluate the decision function for the samples in X. | | :white_check_mark: | `fit(X, y[, sample_weight])` | Fit the SVM model according to the given training data. **Note**: without `sample_weight`. | | :x: | `get_metadata_routing()` | Get metadata routing of this object. | | :white_check_mark: | `get_params([deep])` | Get parameters for this estimator. | @@ -113,8 +119,8 @@ More detailed description of the class methods: - `fit(X, y[, sample_weight])`: Fit the SVM model according to the given training data. - Parameters: - - `X : array_like of shape (n_samples, n_features) or (n_samples, n_samples)`: Training vectors, - where `n_samples` is the number of samples and `n_features` is the number of features. + - `X : array_like of shape (n_samples, n_features)`: Training vectors, where `n_samples` is the number of + samples and `n_features` is the number of features. - `y : array-like of shape (n_samples,)`: Target values (class labels). - `sample_weight : array-like of shape (n_samples,), default=None`: Per-sample weights. Rescale C per sample. Higher weights force the classifier to put more emphasis on these points. **Note**: not supported @@ -181,22 +187,148 @@ More detailed description of the class methods: - Returns: - `self : object`: The updated object. +## Sklearn like API for `sklearn.svm.SVR` + +The following tables show the API provided +by [`sklearn.svm.SVR`](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html) and whether we currently +support the respective constructor parameter, class attribute, or method. +Note that the documentation is a verbose copy from the sklearn SVR page with some additional information added if our +implementation differs from the sklearn implementation. + +### Parameters + +The following parameters are supported +by [`sklearn.svm.SVR`](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html) when construction a +new `SVR`: + +| implementation status | parameter | sklearn description | +|:---------------------:|--------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| :white_check_mark: | `C : real_type, default=1.0` | Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. The penalty is a squared l2 penalty. | +| :white_check_mark: | `kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'laplacian', 'chi_squared'}, default='rbf'` | Specifies the kernel type to be used in the algorithm. If none is given, 'rbf' will be used. **Note**: 'precomputed' is not supported, but 'laplacian' and 'chi_squared' are supported in addition. | +| :white_check_mark: | `degree : int, default=3` | Degree of the polynomial kernel function (‘poly’). Must be non-negative. Ignored by all other kernels. | +| :white_check_mark: | `gamma : {'scale', 'auto'} or real_type, default='scale'` | Kernel coefficient for various kernel functions. **Note**: the default in PLSSVM is 'auto'. | +| :white_check_mark: | `coef0 : real_type, default=0.0` | Independent term in kernel function. It is only significant in 'poly' or 'sigmoid'. | +| :x: | `shrinking : bool, default=False` | Whether to use the shrinking heuristic. **Note**: not supported, therefore, the default is set to `False` | +| :white_check_mark: | `tol : real_type, default=1e-10` | Tolerance for stopping criterion. **Note**: in PLSSVM, this is equal to the (relative) epsilon used in the CG algorithm and, therefore, other values may be necessary than for `sklearn.SVC` SVM implementation. | +| :x: | `cache_size : real_type, default=0` | Specify the size of the kernel cache (in MB). **Note**: not applicable in PLSSVM. | +| :white_check_mark: | `verbose : bool, default=False` | Enable verbose output. **Note**: if set to True, more information will be displayed than it would be the case with LIBSVM (and, therefore, `sklearn.svm.SVC`). | +| :white_check_mark: | `max_iter : int, default=-1` | Hard limit on iterations within solver, or -1 for no limit. **Note**: if -1 is provided, at most `#data_points - 1` many CG iterations are performed. | +| :x: | `epsilon : real_type, default=0.1` | The epsilon-tube within which no penalty is associated in the training loss function. **Note**: not applicable to PLSSVM's regression notation. | + +**Note**: the `plssvm.SVR` automatically uses the optimal (in the sense of performance) backend and target platform, as +they were made available during PLSSVM's build step. + +### Attributes + +The following attributes are supported +by [`sklearn.svm.SVR`](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html): + +| implementation status | attribute | sklearn description | +|:---------------------:|-----------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| :x: | `coef_ : ndarray of shape (n_classes * (n_classes - 1) / 2, n_features)` | Weights assigned to the features when `kernel="linear"`. | +| :x: | `dual_coef_ : ndarray of shape (n_classes -1, n_SV)` | Dual coefficients of the support vector in the decision function, multiplied by their targets. | +| :white_check_mark: | `fit_status_ : int` | 0 if correctly fitted, 1 otherwise (will raise warning). | +| :x: | `intercept_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)` | Constants in decision function. | +| :white_check_mark: | `n_features_in_ : int` | Number of features seen during `fit`. | +| :x: | `feature_names_in_ : ndarray of shape (n_features_in_,)` | Names of features seen during `fit`. | +| :white_check_mark: | `n_iter_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)` for 'ovo' and ndarray of shape (n_classes,) for 'ovr' | Number of iterations run by the optimization routine to fit the model. The shape of this attribute depends on the number of models optimized which in turn depends on the number of classes and decision function. **Note**: for 'ovr' the values correspond to the number of CG iterations necessary for each right-hand side (i.e., class) to converge. | +| :white_check_mark: | `support_ : ndarray of shape (n_SV)` | Indices of support vectors. | +| :white_check_mark: | `support_vectors_ : ndarray of shape (n_SV, n_features)` | Support vectors. | +| :white_check_mark: | `n_support_ : ndarray of shape (n_classes,), dtype=int32` | Number of support vectors for each class. | +| :x: | `probA_ : ndarray of shape (n_classes * (n_classes - 1) / 2)` | Parameter learned in Platt scaling when `probability=True`. | +| :x: | `probB_ : ndarray of shape (n_classes * (n_classes - 1) / 2)` | Parameter learned in Platt scaling when `probability=True`. | + +### Methods + +The following methods are supported +by [`sklearn.svm.SVR`](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html): + +| implementation status | method | sklearn description | +|:---------------------:|-----------------------------------------|------------------------------------------------------------------------------------------------| +| :white_check_mark: | `fit(X, y[, sample_weight])` | Fit the SVM model according to the given training data. **Note**: without `sample_weight`. | +| :x: | `get_metadata_routing()` | Get metadata routing of this object. | +| :white_check_mark: | `get_params([deep])` | Get parameters for this estimator. | +| :white_check_mark: | `predict(X)` | Perform classification on samples in X. | +| :white_check_mark: | `score(X, y[, sample_weight])` | Return the mean accuracy on the given test data and labels. **Note**: without `sample_weight`. | +| :x: | `set_fit_request(*[, sample_weight])` | Request metadata passed to the `fit` method. | +| :white_check_mark: | `set_params(**params)` | Set the parameters of this estimator. | +| :x: | `set_score_request(*[, sample_weight])` | Request metadata passed to the `score` method. | + +More detailed description of the class methods: + +- `fit(X, y[, sample_weight])`: Fit the SVM model according to the given training data. + - Parameters: + - `X : array_like of shape (n_samples, n_features) or (n_samples, n_samples)`: Training vectors, + where `n_samples` is the number of samples and `n_features` is the number of features. + - `y : array-like of shape (n_samples,)`: Target values (class labels). + - `sample_weight : array-like of shape (n_samples,), default=None`: Per-sample weights. Rescale C per sample. + Higher weights force the classifier to put more emphasis on these points. **Note**: not supported + - Returns: + - `self : object`: Fitted estimator. + +- `get_metadata_routing()`: Get metadata routing of this object. + - Returns: + - `routing : MetadataRequest`: A MetadataRequest encapsulating routing information. + +- `get_params(deep=True)`: Get parameters for this estimator. + - Parameters: + - `deep : bool, default=True`: If True, will return the parameters for this estimator and contained sub-objects + that are estimators. **Note**: not applicable, therefore, ignored. + - Returns: + - `params : dict`: Parameter names mapped to their values. + +- `predict(X)`: Perform classification on samples in X. + - Parameters: + - `X : array-like of shape (n_samples, n_features)` + - Returns: + - `y_pred : ndarray of shape (n_samples,)`: Class labels for samples in X. + +- `score(X, y, sample_weight=None)`: Return the mean accuracy on the given test data and labels. + - Parameters: + - `X : array-like of shape (n_samples, n_features)`: Test samples. + - `y : array-like of shape (n_samples,) or (n_samples, n_outputs)`: True labels for X. + - `sample_weightarray-like of shape (n_samples,), default=None`: Sample weights. + - Returns: + - `score : float`: Mean accuracy of `self.predict(X)` w.r.t. `y`. + +- `set_fit_request(*, sample_weight: bool | None | str = "$UNCHANGED$") → SVC`: Request metadata passed to the fit method. + - Parameters: + - `sample_weight : str, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED`: Metadata + routing for `sample_weight` parameter in `fit`. + - Returns: + - `self : object`: The updated object. + +- `set_params(**params)`: Set the parameters of this estimator. + - Parameters: + - `**params : dict`: Estimator parameters. + - Returns: + - `self : object`: Estimator instance. + +- `set_score_request(*, sample_weight: bool | None | str = "$UNCHANGED$") → SVC`: Request metadata passed to the score + method. + - Parameters: + - `sample_weightstr, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED`: Metadata routing + for `sample_weight` parameter in `score`. + - Returns: + - `self : object`: The updated object. + ## Bindings close to our C++ API ### Enumerations The following table lists all PLSSVM enumerations exposed on the Python side: -| enumeration | values | description | -|------------------------|-------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `TargetPlatform` | `AUTOMATIC`, `CPU`, `GPU_NVIDIA`, `GPU_AMD`, `GPU_INTEL` | The different supported target platforms (default: `AUTOMATIC`). If `AUTOMATIC` is provided, checks for available devices in the following order: NVIDIA GPUs -> AMD GPUs -> Intel GPUs -> CPUs. | -| `SolverType` | `AUTOMATIC`, `CG_EXPLICIT`, `CG_IMPLICIT` | The different supported solver types (default: `AUTOMATIC`). If `AUTOMATIC` is provided, the used solver types depends on the available device and system memory. | -| `KernelFunctionType` | `LINEAR`, `POLYNOMIAL`, `RBF`, `SIGMOID`, `LAPLACIAN`, `CHI_SQUARED` | The different supported kernel functions (default: `LINEAR`). | -| `FileFormatType` | `LIBSVM`, `ARFF` | The different supported file format types (default: `LIBSVM`). | -| `GammaCoefficientType` | `AUTOMATIC`, `SCALE` | The different modes for the dynamic gamma calculation (default: `AUTOMATIC`). | -| `ClassificationType` | `OAA`, `OAO` | The different supported multi-class classification strategies (default: `LIBSVM`). | -| `BackendType` | `AUTOMATIC`, `OPENMP`, `HPX`, `CUDA`, `HIP`, `OPENCL`, `SYCL`, `KOKKOS` | The different supported backends (default: `AUTOMATIC`). If `AUTOMATIC` is provided, the selected backend depends on the used target platform. | -| `VerbosityLevel` | `QUIET`, `LIBSVM`, `TIMING`, `FULL` | The different supported log levels (default: `FULL`). `QUIET` means no output, `LIBSVM` output that is as conformant as possible with LIBSVM's output, `TIMING` all timing related outputs, and `FULL` everything. Can be combined via bit-wise operations. | +| enumeration | values | description | +|------------------------|----------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `TargetPlatform` | `AUTOMATIC`, `CPU`, `GPU_NVIDIA`, `GPU_AMD`, `GPU_INTEL` | The different supported target platforms (default: `AUTOMATIC`). If `AUTOMATIC` is provided, checks for available devices in the following order: NVIDIA GPUs -> AMD GPUs -> Intel GPUs -> CPUs. | +| `SolverType` | `AUTOMATIC`, `CG_EXPLICIT`, `CG_IMPLICIT` | The different supported solver types (default: `AUTOMATIC`). If `AUTOMATIC` is provided, the used solver types depends on the available device and system memory. | +| `KernelFunctionType` | `LINEAR`, `POLYNOMIAL`, `RBF`, `SIGMOID`, `LAPLACIAN`, `CHI_SQUARED` | The different supported kernel functions (default: `RBF`). | +| `FileFormatType` | `LIBSVM`, `ARFF` | The different supported file format types (default: `LIBSVM`). | +| `GammaCoefficientType` | `AUTOMATIC`, `SCALE` | The different modes for the dynamic gamma calculation (default: `AUTOMATIC`). | +| `ClassificationType` | `OAA`, `OAO` | The different supported multi-class classification strategies (default: `LIBSVM`). | +| `BackendType` | `AUTOMATIC`, `OPENMP`, `HPX`, `STDPAR` `CUDA`, `HIP`, `OPENCL`, `SYCL`, `KOKKOS` | The different supported backends (default: `AUTOMATIC`). If `AUTOMATIC` is provided, the selected backend depends on the used target platform. | +| `VerbosityLevel` | `QUIET`, `LIBSVM`, `TIMING`, `FULL` | The different supported log levels (default: `FULL`). `QUIET` means no output, `LIBSVM` output that is as conformant as possible with LIBSVM's output, `TIMING` all timing related outputs, and `FULL` everything. Can be combined via bit-wise operations. | +| `SVMType` | `CSVC`, `CSVR`, | The different supported C-SVM types. | If a SYCL implementation is available, additional enumerations are available: @@ -225,15 +357,15 @@ The following tables list all PLSSVM classes exposed on the Python side: The parameter class encapsulates all necessary hyperparameters needed to fit an SVM. -| constructors | description | -|---------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------| -| `Parameter()` | Default construct a parameter object. | -| `Parameter(kernel_type, degree, gamma, coef0, cost)` | Construct a parameter object by explicitly providing each hyper-parameter value. | -| `Parameter([kernel_type=KernelFunctionType.LINEAR, degree=3, gamma=*1/#features*, coef=0.0, cost=1.0])` | Construct a parameter object with the provided named parameters. | +| constructors | description | +|------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------| +| `Parameter()` | Default construct a parameter object. | +| `Parameter(kernel_type, degree, gamma, coef0, cost)` | Construct a parameter object by explicitly providing each hyper-parameter value. | +| `Parameter([kernel_type=KernelFunctionType.RBF, degree=3, gamma=*1/#features*, coef=0.0, cost=1.0])` | Construct a parameter object with the provided named parameters. | | attributes | description | |------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `kernel_type : KernelFunctionType` | The used kernel function type (default: `LINEAR`). | +| `kernel_type : KernelFunctionType` | The used kernel function type (default: `RBF`). | | `degree : int` | The used degree in the polynomial kernel function (default: `3`). | | `gamma : gamma_type` | The used gamma in the different kernel functions (default: `AUTOMATIC`). The `gamma_type` is a `std::variant 0 | | `rbf_kernel_function(x, y, gamma)` | Calculate the radial basis function kernel function of two vectors: exp(-gamma*\|x-y\|^2), with gamma > 0 | | `sigmoid_kernel_function(x, y, gamma, coef0)` | Calculate the sigmoid kernel function of two vectors: tanh(gamma*x'*y), with gamma > 0 | -| `laplacian_kernel_function(x, y, gamma)` | Calculate the sigmoid kernel function of two vectors: exp(-gamma*\|x-y\|_1), with gamma > 0 | -| `chi_squared_kernel_function(x, y, gamma)` | Calculate the sigmoid kernel function of two vectors: exp(-gamma*sum_i((x[i] - y[i])^2) / (x[i] + y[i])), with gamma > 0 | +| `laplacian_kernel_function(x, y, gamma)` | Calculate the laplacian kernel function of two vectors: exp(-gamma*\|x-y\|_1), with gamma > 0 | +| `chi_squared_kernel_function(x, y, gamma)` | Calculate the chi-squared kernel function of two vectors: exp(-gamma*sum_i((x[i] - y[i])^2) / (x[i] + y[i])), with gamma > 0 | | `kernel_function(x, y, params)` | Calculate the kernel function provided in params with the additional parameters also provided in params. | | `classification_type_to_full_string(classification)` | Returns the full string of the provided classification type, i.e., "one vs. all" and "one vs. one" instead of only "oaa" or "oao". | | `calculate_number_of_classifiers(classification, num_classes)` | Return the number of necessary classifiers in a multi-class setting with the provided classification strategy and number of different classes. | @@ -520,6 +660,10 @@ The following table lists all free functions in PLSSVM directly callable via `pl | `equivalent(params1, params2)` | Check whether the two parameter classes are equivalent, i.e., the parameters for **the current kernel function** are identical. E.g., for the rbf kernel function the gamma values must be identical, but the degree values can be different, since degree isn't used in the rbf kernel function. | | `get_gamma_string(gamma)` | Returns the gamma string based on the active member in the `gamma_type` `std::variant`. | | `calculate_gamma_value(gamma, matrix)` | Calculate the value of gamma based on the active member in the `gamma_type` `std::variant`. | +| `list_available_svm_types()` | List all available SVM types (C-SVC or C-SVR). | +| `svm_type_to_task_name(svm_type)` | Returns the task name (classification or regression) associated with the provided SVM type. | +| `svm_type_from_model_file(model_file)` | Returns the SVM type used to create the provided model file. | +| `regression_report(y_true, y_pred, [force_finite, output_dict])` | Returns a regression report similar to sklearn's [`metrics.classification_report`](https://scikit-learn.org/0.15/modules/generated/sklearn.metrics.classification_report.html) for the regression task. If `output_dict` is , returns a Python dictionary, otherwise directly returns a string. | If a SYCL implementation is available, additional free functions are available: @@ -537,19 +681,22 @@ If a stdpar implementation is available, additional free functions are available The PLSSVM Python3 bindings define a few new exception types: -| exception | description | -|------------------------------|------------------------------------------------------------------------------------------------------------------------| -| `PLSSVMError` | Base class of all other PLSSVM specific exceptions. | -| `InvalidParameterError` | If an invalid hyper-parameter has been provided in the `plssvm.Parameter` class. | -| `FileReaderError` | If something went wrong while reading the requested file (possibly using memory mapped IO.) | -| `DataSetError` | If something related to the `plssvm.DataSet` class(es) went wrong, e.g., wrong arguments provided to the constructors. | -| `FileNotFoundError` | If the requested data or model file couldn't be found. | -| `InvalidFileFormatError` | If the requested data or model file are invalid, e.g., wrong LIBSVM model header. | -| `UnsupportedBackendError` | If an unsupported backend has been requested. | -| `UnsupportedKernelTypeError` | If an unsupported target platform has been requested. | -| `GPUDevicePtrError` | If something went wrong in one of the backend's GPU device pointers. **Note**: shouldn't occur in user code. | -| `MatrixError` | If something went wrong in the internal matrix class. **Note**: shouldn't occur in user code. | -| `KernelLaunchResourcesError` | If something went wrong during a kernel launch due to insufficient ressources. | -| `ClassificationReportError` | If something in the classification report went wrong. **Note**: shouldn't occur in user code. | +| exception | description | +|------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `PLSSVMError` | Base class of all other PLSSVM specific exceptions. | +| `InvalidParameterError` | If an invalid hyper-parameter has been provided in the `plssvm.Parameter` class. | +| `FileReaderError` | If something went wrong while reading the requested file (possibly using memory mapped IO.) | +| `DataSetError` | If something related to the `plssvm.ClassificationDataSet`/`plssvm.RegressionDataSet` class(es) went wrong, e.g., wrong arguments provided to the constructors. | +| `MinMaxScalerError` | If something related to the `plssvm.MinMaxScaler` went wrong, e.g., scaling wasn't successfully. | +| `FileNotFoundError` | If the requested data or model file couldn't be found. | +| `InvalidFileFormatError` | If the requested data or model file are invalid, e.g., wrong LIBSVM model header. | +| `UnsupportedBackendError` | If an unsupported backend has been requested. | +| `UnsupportedKernelTypeError` | If an unsupported target platform has been requested. | +| `GPUDevicePtrError` | If something went wrong in one of the backend's GPU device pointers. **Note**: shouldn't occur in user code. | +| `MatrixError` | If something went wrong in the internal matrix class. **Note**: shouldn't occur in user code. | +| `KernelLaunchResourcesError` | If something went wrong during a kernel launch due to insufficient ressources. | +| `ClassificationReportError` | If something in the classification report went wrong. **Note**: shouldn't occur in user code. | +| `RegressionReportError` | If something in the regression report went wrong. **Note**: shouldn't occur in user code. | +| `EnvironmentError` | If something during the special environment initialization or finalization went wrong. | Depending on the available backends, additional `BackendError`s are also available (e.g., `plssvm.cuda.BackendError`). diff --git a/bindings/Python/__cli__.py b/bindings/Python/__cli__.py new file mode 100644 index 000000000..f64ec44d3 --- /dev/null +++ b/bindings/Python/__cli__.py @@ -0,0 +1,21 @@ +import sys +import subprocess +from pathlib import Path + + +# support for plssvm-train including command line arguments +def train(): + exe_path = Path(__file__).parent / "plssvm-train" + subprocess.run([str(exe_path)] + sys.argv[1:]) + + +# support for plssvm-predict including command line arguments +def predict(): + exe_path = Path(__file__).parent / "plssvm-predict" + subprocess.run([str(exe_path)] + sys.argv[1:]) + + +# support for plssvm-scale including command line arguments +def scale(): + exe_path = Path(__file__).parent / "plssvm-scale" + subprocess.run([str(exe_path)] + sys.argv[1:]) diff --git a/bindings/Python/__init__.py b/bindings/Python/__init__.py new file mode 100644 index 000000000..5976a4f2a --- /dev/null +++ b/bindings/Python/__init__.py @@ -0,0 +1,8 @@ +# import the plssvm module explicitly +from . import plssvm +# export everything +from .plssvm import * # noqa: F405 + +# explicitly set the module level attributes +__doc__ = plssvm.__doc__ +__version__ = plssvm.__version__ diff --git a/bindings/Python/__install_check__.py b/bindings/Python/__install_check__.py new file mode 100644 index 000000000..eedfd897f --- /dev/null +++ b/bindings/Python/__install_check__.py @@ -0,0 +1,33 @@ +import plssvm + +# print information regarding the current installation after an installation via pip +def check(): + print("{} ({})".format(plssvm.__doc__, plssvm.__version__)) + print() + + print("Copyright(C) 2018-today The PLSSVM project - All Rights Reserved") + print("This is free software distributed under the MIT license.") + print() + + print("Available target platforms: {}".format(', '.join(str(target) for target in plssvm.list_available_target_platforms()))) + print("Default target platform: {}\n".format(str(plssvm.determine_default_target_platform()))) + + print("Available backends: {}".format(', '.join(str(backend) for backend in plssvm.list_available_backends()))) + for target in plssvm.list_available_target_platforms(): + if target == plssvm.TargetPlatform.AUTOMATIC: + continue + try: + backend = plssvm.determine_default_backend(available_target_platforms=[target]) + print("Default backend for target platform {}: {}".format(str(target), str(backend))) + except Exception: + pass + print() + + if plssvm.BackendType.SYCL in plssvm.list_available_backends(): + print("Available SYCL implementations: {}".format(', '.join(str(impl) for impl in plssvm.sycl.list_available_sycl_implementations()))) + print() + + print() + print("Repository: https://github.com/SC-SGS/PLSSVM.git") + print("Documentation: https://sc-sgs.github.io/PLSSVM/") + print("Issues: https://github.com/SC-SGS/PLSSVM/issues") \ No newline at end of file diff --git a/bindings/Python/backend_types.cpp b/bindings/Python/backend_types.cpp index 5664cf360..7997991b9 100644 --- a/bindings/Python/backend_types.cpp +++ b/bindings/Python/backend_types.cpp @@ -18,7 +18,7 @@ namespace py = pybind11; void init_backend_types(py::module_ &m) { // bind enum class - py::enum_(m, "BackendType") + py::enum_(m, "BackendType", "Enum class for all possible backend types, all different SYCL implementations have the same backend type \"sycl\".") .value("AUTOMATIC", plssvm::backend_type::automatic, "the default backend; depends on the specified target platform") .value("OPENMP", plssvm::backend_type::openmp, "OpenMP to target CPUs only (currently no OpenMP target offloading support)") .value("HPX", plssvm::backend_type::hpx, "HPX to target CPUs only (currently no GPU executor support)") diff --git a/bindings/Python/backends/adaptivecpp_csvm.cpp b/bindings/Python/backends/adaptivecpp_csvm.cpp index bf81b11ae..a02dd0b65 100644 --- a/bindings/Python/backends/adaptivecpp_csvm.cpp +++ b/bindings/Python/backends/adaptivecpp_csvm.cpp @@ -6,59 +6,104 @@ * See the LICENSE.md file in the project root for full license information. */ +#include "plssvm/backend_types.hpp" // plssvm::adaptivecpp::backend_csvm_type_t #include "plssvm/backends/SYCL/AdaptiveCpp/csvm.hpp" // plssvm::adaptivecpp::csvm #include "plssvm/backends/SYCL/exceptions.hpp" // plssvm::adaptivecpp::backend_exception #include "plssvm/backends/SYCL/kernel_invocation_types.hpp" // plssvm::sycl::kernel_invocation_type -#include "plssvm/csvm.hpp" // plssvm::csvm #include "plssvm/exceptions/exceptions.hpp" // plssvm::exception -#include "plssvm/parameter.hpp" // plssvm::parameter, plssvm::sycl_kernel_invocation_type +#include "plssvm/parameter.hpp" // plssvm::parameter +#include "plssvm/svm/csvc.hpp" // plssvm::csvc +#include "plssvm/svm/csvm.hpp" // plssvm::csvm +#include "plssvm/svm/csvr.hpp" // plssvm::csvr #include "plssvm/target_platforms.hpp" // plssvm::target_platform -#include "bindings/Python/utility.hpp" // check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception} -#include "pybind11/pybind11.h" // py::module_, py::class_, py::init -#include "pybind11/stl.h" // support for STL types +#include "fmt/format.h" // fmt::format +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::exception +#include "pybind11/pytypes.h" // py::kwargs #include // std::make_unique +#include // std::string namespace py = pybind11; -py::module_ init_adaptivecpp_csvm(py::module_ &m, const py::exception &base_exception) { - // use its own submodule for the AdaptiveCpp CSVM bindings - py::module_ adaptivecpp_module = m.def_submodule("adaptivecpp", "a module containing all AdaptiveCpp SYCL backend specific functionality"); +namespace { + +template +void bind_adaptivecpp_csvms(py::module_ &m, const std::string &csvm_name) { + using backend_csvm_type = plssvm::adaptivecpp::backend_csvm_type_t; - // bind the CSVM using the AdaptiveCpp backend - py::class_(adaptivecpp_module, "CSVM") - .def(py::init<>(), "create an SVM with the automatic target platform and default parameter object") - .def(py::init(), "create an SVM with the automatic target platform and provided parameter object") - .def(py::init(), "create an SVM with the provided target platform and default parameter object") - .def(py::init(), "create an SVM with the provided target platform and parameter object") + // assemble docstrings + const std::string class_docstring{ fmt::format("A {} using the AdaptiveCpp SYCL backend.", csvm_name) }; + const std::string param_docstring{ fmt::format("create an AdaptiveCpp SYCL {} with the provided parameters and optional SYCL specific keyword arguments", csvm_name) }; + const std::string target_param_docstring{ fmt::format("create an AdaptiveCpp SYCL {} with the provided target platform, parameters, and optional SYCL specific keyword arguments", csvm_name) }; + const std::string kwargs_docstring{ fmt::format("create an AdaptiveCpp SYCL {} with the provided keyword arguments (including optional SYCL specific keyword arguments)", csvm_name) }; + const std::string target_kwargs_docstring{ fmt::format("create an AdaptiveCpp SYCL {} with the provided target platform and keyword arguments (including optional SYCL specific keyword arguments)", csvm_name) }; + + py::class_(m, csvm_name.c_str(), class_docstring.c_str()) + .def(py::init([](const plssvm::parameter params, const py::kwargs &args) { + // check for valid keys + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "sycl_kernel_invocation_type" }); + // set SYCL kernel invocation type + const plssvm::sycl::kernel_invocation_type invocation = args.contains("sycl_kernel_invocation_type") ? args["sycl_kernel_invocation_type"].cast() : plssvm::sycl::kernel_invocation_type::automatic; + // create C-SVM with the default target platform + return std::make_unique(params, plssvm::sycl_kernel_invocation_type = invocation); + }), + param_docstring.c_str()) + .def(py::init([](const plssvm::target_platform target, const plssvm::parameter params, const py::kwargs &args) { + // check for valid keys + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "sycl_kernel_invocation_type" }); + // set SYCL kernel invocation type + const plssvm::sycl::kernel_invocation_type invocation = args.contains("sycl_kernel_invocation_type") ? args["sycl_kernel_invocation_type"].cast() : plssvm::sycl::kernel_invocation_type::automatic; + // create C-SVM with the default target platform + return std::make_unique(target, params, plssvm::sycl_kernel_invocation_type = invocation); + }), + target_param_docstring.c_str()) .def(py::init([](const py::kwargs &args) { // check for valid keys - check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost", "sycl_kernel_invocation_type" }); + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost", "sycl_kernel_invocation_type" }); // if one of the value keyword parameter is provided, set the respective value - const plssvm::parameter params = convert_kwargs_to_parameter(args); + const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args); // set SYCL kernel invocation type - const plssvm::sycl::kernel_invocation_type invoc = args.contains("sycl_kernel_invocation_type") ? args["sycl_kernel_invocation_type"].cast() : plssvm::sycl::kernel_invocation_type::automatic; - // create CSVM with the default target platform - return std::make_unique(params, plssvm::sycl_kernel_invocation_type = invoc); + const plssvm::sycl::kernel_invocation_type invocation = args.contains("sycl_kernel_invocation_type") ? args["sycl_kernel_invocation_type"].cast() : plssvm::sycl::kernel_invocation_type::automatic; + // create C-SVM with the default target platform + return std::make_unique(params, plssvm::sycl_kernel_invocation_type = invocation); }), - "create an SVM with the default target platform and keyword arguments") + kwargs_docstring.c_str()) .def(py::init([](const plssvm::target_platform target, const py::kwargs &args) { // check for valid keys - check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost", "sycl_kernel_invocation_type" }); + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost", "sycl_kernel_invocation_type" }); // if one of the value keyword parameter is provided, set the respective value - const plssvm::parameter params = convert_kwargs_to_parameter(args); + const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args); // set SYCL kernel invocation type - const plssvm::sycl::kernel_invocation_type invoc = args.contains("sycl_kernel_invocation_type") ? args["sycl_kernel_invocation_type"].cast() : plssvm::sycl::kernel_invocation_type::automatic; - // create CSVM with the default target platform - return std::make_unique(target, params, plssvm::sycl_kernel_invocation_type = invoc); + const plssvm::sycl::kernel_invocation_type invocation = args.contains("sycl_kernel_invocation_type") ? args["sycl_kernel_invocation_type"].cast() : plssvm::sycl::kernel_invocation_type::automatic; + // create C-SVM with the provided target platform + return std::make_unique(target, params, plssvm::sycl_kernel_invocation_type = invocation); }), - "create an SVM with the provided target platform and keyword arguments") - .def("get_kernel_invocation_type", &plssvm::adaptivecpp::csvm::get_kernel_invocation_type, "get the kernel invocation type used in this SYCL SVM"); + target_kwargs_docstring.c_str()) + .def("get_kernel_invocation_type", &plssvm::adaptivecpp::csvm::get_kernel_invocation_type, "get the kernel invocation type used in this SYCL C-SVM") + .def("__repr__", [csvm_name](const backend_csvm_type &self) { + return fmt::format("", csvm_name, self.num_available_devices(), self.get_kernel_invocation_type()); + }); +} + +} // namespace + +py::module_ init_adaptivecpp_csvm(py::module_ &m, const py::exception &base_exception) { + // use its own submodule for the AdaptiveCpp C-SVM bindings + py::module_ adaptivecpp_module = m.def_submodule("adaptivecpp", "a module containing all AdaptiveCpp backend specific functionality"); + const py::module_ adaptivecpp_pure_virtual_module = adaptivecpp_module.def_submodule("__pure_virtual", "a module containing all pure-virtual AdaptiveCpp backend specific functionality"); + + // bind the pure-virtual base AdaptiveCpp C-SVM + [[maybe_unused]] const py::class_ virtual_base_adaptivecpp_csvm(adaptivecpp_pure_virtual_module, "__pure_virtual_adaptivecpp_base_CSVM"); + + // bind the specific AdaptiveCpp C-SVC and C-SVR classes + bind_adaptivecpp_csvms(adaptivecpp_module, "CSVC"); + bind_adaptivecpp_csvms(adaptivecpp_module, "CSVR"); // register AdaptiveCpp backend specific exceptions - register_py_exception(adaptivecpp_module, "BackendError", base_exception); + plssvm::bindings::python::util::register_py_exception(adaptivecpp_module, "BackendError", base_exception); return adaptivecpp_module; } diff --git a/bindings/Python/backends/cuda_csvm.cpp b/bindings/Python/backends/cuda_csvm.cpp index adc9ce306..cb7867d20 100644 --- a/bindings/Python/backends/cuda_csvm.cpp +++ b/bindings/Python/backends/cuda_csvm.cpp @@ -6,51 +6,80 @@ * See the LICENSE.md file in the project root for full license information. */ +#include "plssvm/backend_types.hpp" // plssvm::cuda::backend_csvm_type_t #include "plssvm/backends/CUDA/csvm.hpp" // plssvm::cuda::csvm #include "plssvm/backends/CUDA/exceptions.hpp" // plssvm::cuda::backend_exception -#include "plssvm/csvm.hpp" // plssvm::csvm #include "plssvm/exceptions/exceptions.hpp" // plssvm::exception #include "plssvm/parameter.hpp" // plssvm::parameter +#include "plssvm/svm/csvc.hpp" // plssvm::csvc +#include "plssvm/svm/csvm.hpp" // plssvm::csvm +#include "plssvm/svm/csvr.hpp" // plssvm::csvr #include "plssvm/target_platforms.hpp" // plssvm::target_platform -#include "bindings/Python/utility.hpp" // check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception} -#include "pybind11/pybind11.h" // py::module_, py::class_, py::init -#include "pybind11/stl.h" // support for STL types +#include "fmt/format.h" // fmt::format +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::exception +#include "pybind11/pytypes.h" // py::kwargs #include // std::make_unique +#include // std::string namespace py = pybind11; -void init_cuda_csvm(py::module_ &m, const py::exception &base_exception) { - // use its own submodule for the CUDA CSVM bindings - py::module_ cuda_module = m.def_submodule("cuda", "a module containing all CUDA backend specific functionality"); +namespace { + +template +void bind_cuda_csvms(py::module_ &m, const std::string &csvm_name) { + using backend_csvm_type = plssvm::cuda::backend_csvm_type_t; - // bind the CSVM using the CUDA backend - py::class_(cuda_module, "CSVM") - .def(py::init<>(), "create an SVM with the automatic target platform and default parameter object") - .def(py::init(), "create an SVM with the automatic target platform and provided parameter object") - .def(py::init(), "create an SVM with the provided target platform and default parameter object") - .def(py::init(), "create an SVM with the provided target platform and parameter object") + // assemble docstrings + const std::string class_docstring{ fmt::format("A {} using the CUDA backend.", csvm_name) }; + const std::string param_docstring{ fmt::format("create a CUDA {} with the provided parameters", csvm_name) }; + const std::string target_param_docstring{ fmt::format("create a CUDA {} with the provided target platform and parameters", csvm_name) }; + const std::string kwargs_docstring{ fmt::format("create a CUDA {} with the provided keyword arguments", csvm_name) }; + const std::string target_kwargs_docstring{ fmt::format("create a CUDA {} with the provided target platform and keyword arguments", csvm_name) }; + + py::class_(m, csvm_name.c_str(), class_docstring.c_str()) + .def(py::init(), param_docstring.c_str()) + .def(py::init(), target_param_docstring.c_str()) .def(py::init([](const py::kwargs &args) { // check for valid keys - check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); // if one of the value keyword parameter is provided, set the respective value - const plssvm::parameter params = convert_kwargs_to_parameter(args); - // create CSVM with the default target platform - return std::make_unique(params); + const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args); + // create C-SVM with the default target platform + return std::make_unique(params); }), - "create an SVM with the default target platform and keyword arguments") + kwargs_docstring.c_str()) .def(py::init([](const plssvm::target_platform target, const py::kwargs &args) { // check for valid keys - check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); // if one of the value keyword parameter is provided, set the respective value - const plssvm::parameter params = convert_kwargs_to_parameter(args); - // create CSVM with the provided target platform - return std::make_unique(target, params); + const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args); + // create C-SVM with the provided target platform + return std::make_unique(target, params); }), - "create an SVM with the provided target platform and keyword arguments"); + target_kwargs_docstring.c_str()) + .def("__repr__", [csvm_name](const backend_csvm_type &self) { + return fmt::format("", csvm_name, self.num_available_devices()); + }); +} + +} // namespace + +void init_cuda_csvm(py::module_ &m, const py::exception &base_exception) { + // use its own submodule for the CUDA C-SVM bindings + py::module_ cuda_module = m.def_submodule("cuda", "a module containing all CUDA backend specific functionality"); + const py::module_ cuda_pure_virtual_module = cuda_module.def_submodule("__pure_virtual", "a module containing all pure-virtual CUDA backend specific functionality"); + + // bind the pure-virtual base CUDA C-SVM + [[maybe_unused]] const py::class_ virtual_base_cuda_csvm(cuda_pure_virtual_module, "__pure_virtual_cuda_base_CSVM"); + + // bind the specific CUDA C-SVC and C-SVR classes + bind_cuda_csvms(cuda_module, "CSVC"); + bind_cuda_csvms(cuda_module, "CSVR"); // register CUDA backend specific exceptions - register_py_exception(cuda_module, "BackendError", base_exception); + plssvm::bindings::python::util::register_py_exception(cuda_module, "BackendError", base_exception); } diff --git a/bindings/Python/backends/dpcpp_csvm.cpp b/bindings/Python/backends/dpcpp_csvm.cpp index 906cb5979..b895bdeef 100644 --- a/bindings/Python/backends/dpcpp_csvm.cpp +++ b/bindings/Python/backends/dpcpp_csvm.cpp @@ -6,59 +6,104 @@ * See the LICENSE.md file in the project root for full license information. */ +#include "plssvm/backend_types.hpp" // plssvm::dpcpp::backend_csvm_type_t #include "plssvm/backends/SYCL/DPCPP/csvm.hpp" // plssvm::dpcpp::csvm #include "plssvm/backends/SYCL/exceptions.hpp" // plssvm::dpcpp::backend_exception #include "plssvm/backends/SYCL/kernel_invocation_types.hpp" // plssvm::sycl::kernel_invocation_type -#include "plssvm/csvm.hpp" // plssvm::csvm #include "plssvm/exceptions/exceptions.hpp" // plssvm::exception -#include "plssvm/parameter.hpp" // plssvm::parameter, plssvm::sycl_kernel_invocation_type +#include "plssvm/parameter.hpp" // plssvm::parameter +#include "plssvm/svm/csvc.hpp" // plssvm::csvc +#include "plssvm/svm/csvm.hpp" // plssvm::csvm +#include "plssvm/svm/csvr.hpp" // plssvm::csvr #include "plssvm/target_platforms.hpp" // plssvm::target_platform -#include "bindings/Python/utility.hpp" // check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception} -#include "pybind11/pybind11.h" // py::module_, py::class_, py::init -#include "pybind11/stl.h" // support for STL types +#include "fmt/format.h" // fmt::format +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::exception +#include "pybind11/pytypes.h" // py::kwargs #include // std::make_unique +#include // std::string namespace py = pybind11; -py::module_ init_dpcpp_csvm(py::module_ &m, const py::exception &base_exception) { - // use its own submodule for the DPCPP CSVM bindings - py::module_ dpcpp_module = m.def_submodule("dpcpp", "a module containing all DPC++ SYCL backend specific functionality"); +namespace { + +template +void bind_dpcpp_csvms(py::module_ &m, const std::string &csvm_name) { + using backend_csvm_type = plssvm::dpcpp::backend_csvm_type_t; - // bind the CSVM using the DPCPP backend - py::class_(dpcpp_module, "CSVM") - .def(py::init<>(), "create an SVM with the automatic target platform and default parameter object") - .def(py::init(), "create an SVM with the automatic target platform and provided parameter object") - .def(py::init(), "create an SVM with the provided target platform and default parameter object") - .def(py::init(), "create an SVM with the provided target platform and parameter object") + // assemble docstrings + const std::string class_docstring{ fmt::format("A {} using the DPC++ SYCL backend.", csvm_name) }; + const std::string param_docstring{ fmt::format("create a DPC++ SYCL {} with the provided parameters and optional SYCL specific keyword arguments", csvm_name) }; + const std::string target_param_docstring{ fmt::format("create a DPC++ SYCL {} with the provided target platform, parameters, and optional SYCL specific keyword arguments", csvm_name) }; + const std::string kwargs_docstring{ fmt::format("create a DPC++ SYCL {} with the provided keyword arguments (including optional SYCL specific keyword arguments)", csvm_name) }; + const std::string target_kwargs_docstring{ fmt::format("create a DPC++ SYCL {} with the provided target platform and keyword arguments (including optional SYCL specific keyword arguments)", csvm_name) }; + + py::class_(m, csvm_name.c_str(), class_docstring.c_str()) + .def(py::init([](const plssvm::parameter params, const py::kwargs &args) { + // check for valid keys + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "sycl_kernel_invocation_type" }); + // set SYCL kernel invocation type + const plssvm::sycl::kernel_invocation_type invocation = args.contains("sycl_kernel_invocation_type") ? args["sycl_kernel_invocation_type"].cast() : plssvm::sycl::kernel_invocation_type::automatic; + // create C-SVM with the default target platform + return std::make_unique(params, plssvm::sycl_kernel_invocation_type = invocation); + }), + param_docstring.c_str()) + .def(py::init([](const plssvm::target_platform target, const plssvm::parameter params, const py::kwargs &args) { + // check for valid keys + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "sycl_kernel_invocation_type" }); + // set SYCL kernel invocation type + const plssvm::sycl::kernel_invocation_type invocation = args.contains("sycl_kernel_invocation_type") ? args["sycl_kernel_invocation_type"].cast() : plssvm::sycl::kernel_invocation_type::automatic; + // create C-SVM with the default target platform + return std::make_unique(target, params, plssvm::sycl_kernel_invocation_type = invocation); + }), + target_param_docstring.c_str()) .def(py::init([](const py::kwargs &args) { // check for valid keys - check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost", "sycl_kernel_invocation_type" }); + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost", "sycl_kernel_invocation_type" }); // if one of the value keyword parameter is provided, set the respective value - const plssvm::parameter params = convert_kwargs_to_parameter(args); + const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args); // set SYCL kernel invocation type - const plssvm::sycl::kernel_invocation_type invoc = args.contains("sycl_kernel_invocation_type") ? args["sycl_kernel_invocation_type"].cast() : plssvm::sycl::kernel_invocation_type::automatic; - // create CSVM with the default target platform - return std::make_unique(params, plssvm::sycl_kernel_invocation_type = invoc); + const plssvm::sycl::kernel_invocation_type invocation = args.contains("sycl_kernel_invocation_type") ? args["sycl_kernel_invocation_type"].cast() : plssvm::sycl::kernel_invocation_type::automatic; + // create C-SVM with the default target platform + return std::make_unique(params, plssvm::sycl_kernel_invocation_type = invocation); }), - "create an SVM with the default target platform and keyword arguments") + kwargs_docstring.c_str()) .def(py::init([](const plssvm::target_platform target, const py::kwargs &args) { // check for valid keys - check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost", "sycl_kernel_invocation_type" }); + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost", "sycl_kernel_invocation_type" }); // if one of the value keyword parameter is provided, set the respective value - const plssvm::parameter params = convert_kwargs_to_parameter(args); + const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args); // set SYCL kernel invocation type - const plssvm::sycl::kernel_invocation_type invoc = args.contains("sycl_kernel_invocation_type") ? args["sycl_kernel_invocation_type"].cast() : plssvm::sycl::kernel_invocation_type::automatic; - // create CSVM with the default target platform - return std::make_unique(target, params, plssvm::sycl_kernel_invocation_type = invoc); + const plssvm::sycl::kernel_invocation_type invocation = args.contains("sycl_kernel_invocation_type") ? args["sycl_kernel_invocation_type"].cast() : plssvm::sycl::kernel_invocation_type::automatic; + // create C-SVM with the provided target platform + return std::make_unique(target, params, plssvm::sycl_kernel_invocation_type = invocation); }), - "create an SVM with the provided target platform and keyword arguments") - .def("get_kernel_invocation_type", &plssvm::dpcpp::csvm::get_kernel_invocation_type, "get the kernel invocation type used in this SYCL SVM"); + target_kwargs_docstring.c_str()) + .def("get_kernel_invocation_type", &plssvm::dpcpp::csvm::get_kernel_invocation_type, "get the kernel invocation type used in this SYCL C-SVM") + .def("__repr__", [csvm_name](const backend_csvm_type &self) { + return fmt::format("", csvm_name, self.num_available_devices(), self.get_kernel_invocation_type()); + }); +} + +} // namespace + +py::module_ init_dpcpp_csvm(py::module_ &m, const py::exception &base_exception) { + // use its own submodule for the DPC++ C-SVM bindings + py::module_ dpcpp_module = m.def_submodule("dpcpp", "a module containing all DPC++ backend specific functionality"); + const py::module_ dpcpp_pure_virtual_module = dpcpp_module.def_submodule("__pure_virtual", "a module containing all pure-virtual DPC++ backend specific functionality"); + + // bind the pure-virtual base DPC++ C-SVM + [[maybe_unused]] const py::class_ virtual_base_dpcpp_csvm(dpcpp_pure_virtual_module, "__pure_virtual_dpcpp_base_CSVM"); + + // bind the specific DPC++ C-SVC and C-SVR classes + bind_dpcpp_csvms(dpcpp_module, "CSVC"); + bind_dpcpp_csvms(dpcpp_module, "CSVR"); - // register DPCPP backend specific exceptions - register_py_exception(dpcpp_module, "BackendError", base_exception); + // register DPC++ backend specific exceptions + plssvm::bindings::python::util::register_py_exception(dpcpp_module, "BackendError", base_exception); return dpcpp_module; } diff --git a/bindings/Python/backends/hip_csvm.cpp b/bindings/Python/backends/hip_csvm.cpp index 9647f8517..fc05c13d4 100644 --- a/bindings/Python/backends/hip_csvm.cpp +++ b/bindings/Python/backends/hip_csvm.cpp @@ -6,51 +6,80 @@ * See the LICENSE.md file in the project root for full license information. */ +#include "plssvm/backend_types.hpp" // plssvm::hip::backend_csvm_type_t #include "plssvm/backends/HIP/csvm.hpp" // plssvm::hip::csvm #include "plssvm/backends/HIP/exceptions.hpp" // plssvm::hip::backend_exception -#include "plssvm/csvm.hpp" // plssvm::csvm #include "plssvm/exceptions/exceptions.hpp" // plssvm::exception #include "plssvm/parameter.hpp" // plssvm::parameter +#include "plssvm/svm/csvc.hpp" // plssvm::csvc +#include "plssvm/svm/csvm.hpp" // plssvm::csvm +#include "plssvm/svm/csvr.hpp" // plssvm::csvr #include "plssvm/target_platforms.hpp" // plssvm::target_platform -#include "bindings/Python/utility.hpp" // check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception} -#include "pybind11/pybind11.h" // py::module_, py::class_, py::init -#include "pybind11/stl.h" // support for STL types +#include "fmt/format.h" // fmt::format +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::exception +#include "pybind11/pytypes.h" // py::kwargs #include // std::make_unique +#include // std::string namespace py = pybind11; -void init_hip_csvm(py::module_ &m, const py::exception &base_exception) { - // use its own submodule for the HIP CSVM bindings - py::module_ hip_module = m.def_submodule("hip", "a module containing all HIP backend specific functionality"); +namespace { + +template +void bind_hip_csvms(py::module_ &m, const std::string &csvm_name) { + using backend_csvm_type = plssvm::hip::backend_csvm_type_t; - // bind the CSVM using the HIP backend - py::class_(hip_module, "CSVM") - .def(py::init<>(), "create an SVM with the automatic target platform and default parameter object") - .def(py::init(), "create an SVM with the automatic target platform and provided parameter object") - .def(py::init(), "create an SVM with the provided target platform and default parameter object") - .def(py::init(), "create an SVM with the provided target platform and parameter object") + // assemble docstrings + const std::string class_docstring{ fmt::format("A {} using the HIP backend.", csvm_name) }; + const std::string param_docstring{ fmt::format("create a HIP {} with the provided parameters", csvm_name) }; + const std::string target_param_docstring{ fmt::format("create a HIP {} with the provided target platform and parameters", csvm_name) }; + const std::string kwargs_docstring{ fmt::format("create a HIP {} with the provided keyword arguments", csvm_name) }; + const std::string target_kwargs_docstring{ fmt::format("create a HIP {} with the provided target platform and keyword arguments", csvm_name) }; + + py::class_(m, csvm_name.c_str(), class_docstring.c_str()) + .def(py::init(), param_docstring.c_str()) + .def(py::init(), target_param_docstring.c_str()) .def(py::init([](const py::kwargs &args) { // check for valid keys - check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); // if one of the value keyword parameter is provided, set the respective value - const plssvm::parameter params = convert_kwargs_to_parameter(args); - // create CSVM with the default target platform - return std::make_unique(params); + const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args); + // create C-SVM with the default target platform + return std::make_unique(params); }), - "create an SVM with the default target platform and keyword arguments") + kwargs_docstring.c_str()) .def(py::init([](const plssvm::target_platform target, const py::kwargs &args) { // check for valid keys - check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); // if one of the value keyword parameter is provided, set the respective value - const plssvm::parameter params = convert_kwargs_to_parameter(args); - // create CSVM with the provided target platform - return std::make_unique(target, params); + const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args); + // create C-SVM with the provided target platform + return std::make_unique(target, params); }), - "create an SVM with the provided target platform and keyword arguments"); + target_kwargs_docstring.c_str()) + .def("__repr__", [csvm_name](const backend_csvm_type &self) { + return fmt::format("", csvm_name, self.num_available_devices()); + }); +} + +} // namespace + +void init_hip_csvm(py::module_ &m, const py::exception &base_exception) { + // use its own submodule for the HIP C-SVM bindings + py::module_ hip_module = m.def_submodule("hip", "a module containing all HIP backend specific functionality"); + const py::module_ hip_pure_virtual_module = hip_module.def_submodule("__pure_virtual", "a module containing all pure-virtual HIP backend specific functionality"); + + // bind the pure-virtual base HIP C-SVM + [[maybe_unused]] const py::class_ virtual_base_hip_csvm(hip_pure_virtual_module, "__pure_virtual_hip_base_CSVM"); + + // bind the specific HIP C-SVC and C-SVR classes + bind_hip_csvms(hip_module, "CSVC"); + bind_hip_csvms(hip_module, "CSVR"); // register HIP backend specific exceptions - register_py_exception(hip_module, "BackendError", base_exception); + plssvm::bindings::python::util::register_py_exception(hip_module, "BackendError", base_exception); } diff --git a/bindings/Python/backends/hpx_csvm.cpp b/bindings/Python/backends/hpx_csvm.cpp index 92b4fef10..48710e4c5 100644 --- a/bindings/Python/backends/hpx_csvm.cpp +++ b/bindings/Python/backends/hpx_csvm.cpp @@ -7,51 +7,80 @@ * See the LICENSE.md file in the project root for full license information. */ +#include "plssvm/backend_types.hpp" // plssvm::hpx::backend_csvm_type_t #include "plssvm/backends/HPX/csvm.hpp" // plssvm::hpx::csvm #include "plssvm/backends/HPX/exceptions.hpp" // plssvm::hpx::backend_exception -#include "plssvm/csvm.hpp" // plssvm::csvm #include "plssvm/exceptions/exceptions.hpp" // plssvm::exception #include "plssvm/parameter.hpp" // plssvm::parameter +#include "plssvm/svm/csvc.hpp" // plssvm::csvc +#include "plssvm/svm/csvm.hpp" // plssvm::csvm +#include "plssvm/svm/csvr.hpp" // plssvm::csvr #include "plssvm/target_platforms.hpp" // plssvm::target_platform -#include "bindings/Python/utility.hpp" // check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception} -#include "pybind11/pybind11.h" // py::module_, py::class_, py::init -#include "pybind11/stl.h" // support for STL types +#include "fmt/format.h" // fmt::format +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::exception +#include "pybind11/pytypes.h" // py::kwargs #include // std::make_unique +#include // std::string namespace py = pybind11; -void init_hpx_csvm(py::module_ &m, const py::exception &base_exception) { - // use its own submodule for the HPX CSVM bindings - py::module_ hpx_module = m.def_submodule("hpx", "a module containing all HPX backend specific functionality"); +namespace { + +template +void bind_hpx_csvms(py::module_ &m, const std::string &csvm_name) { + using backend_csvm_type = plssvm::hpx::backend_csvm_type_t; - // bind the CSVM using the HPX backend - py::class_(hpx_module, "CSVM") - .def(py::init<>(), "create an SVM with the automatic target platform and default parameter object") - .def(py::init(), "create an SVM with the automatic target platform and provided parameter object") - .def(py::init(), "create an SVM with the provided target platform and default parameter object") - .def(py::init(), "create an SVM with the provided target platform and parameter object") + // assemble docstrings + const std::string class_docstring{ fmt::format("A {} using the HPX backend.", csvm_name) }; + const std::string param_docstring{ fmt::format("create an HPX {} with the provided parameters", csvm_name) }; + const std::string target_param_docstring{ fmt::format("create an HPX {} with the provided target platform and parameters", csvm_name) }; + const std::string kwargs_docstring{ fmt::format("create an HPX {} with the provided keyword arguments", csvm_name) }; + const std::string target_kwargs_docstring{ fmt::format("create an HPX {} with the provided target platform and keyword arguments", csvm_name) }; + + py::class_(m, csvm_name.c_str(), class_docstring.c_str()) + .def(py::init(), param_docstring.c_str()) + .def(py::init(), target_param_docstring.c_str()) .def(py::init([](const py::kwargs &args) { // check for valid keys - check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); // if one of the value keyword parameter is provided, set the respective value - const plssvm::parameter params = convert_kwargs_to_parameter(args); - // create CSVM with the default target platform - return std::make_unique(params); + const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args); + // create C-SVM with the default target platform + return std::make_unique(params); }), - "create an SVM with the default target platform and keyword arguments") + kwargs_docstring.c_str()) .def(py::init([](const plssvm::target_platform target, const py::kwargs &args) { // check for valid keys - check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); // if one of the value keyword parameter is provided, set the respective value - const plssvm::parameter params = convert_kwargs_to_parameter(args); - // create CSVM with the provided target platform - return std::make_unique(target, params); + const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args); + // create C-SVM with the provided target platform + return std::make_unique(target, params); }), - "create an SVM with the provided target platform and keyword arguments"); + target_kwargs_docstring.c_str()) + .def("__repr__", [csvm_name](const backend_csvm_type &self) { + return fmt::format("", csvm_name, self.num_available_devices()); + }); +} + +} // namespace + +void init_hpx_csvm(py::module_ &m, const py::exception &base_exception) { + // use its own submodule for the HPX C-SVM bindings + py::module_ hpx_module = m.def_submodule("hpx", "a module containing all HPX backend specific functionality"); + const py::module_ hpx_pure_virtual_module = hpx_module.def_submodule("__pure_virtual", "a module containing all pure-virtual HPX backend specific functionality"); + + // bind the pure-virtual base HPX C-SVM + [[maybe_unused]] const py::class_ virtual_base_hpx_csvm(hpx_pure_virtual_module, "__pure_virtual_hpx_base_CSVM"); + + // bind the specific HPX C-SVC and C-SVR classes + bind_hpx_csvms(hpx_module, "CSVC"); + bind_hpx_csvms(hpx_module, "CSVR"); // register HPX backend specific exceptions - register_py_exception(hpx_module, "BackendError", base_exception); + plssvm::bindings::python::util::register_py_exception(hpx_module, "BackendError", base_exception); } diff --git a/bindings/Python/backends/kokkos_csvm.cpp b/bindings/Python/backends/kokkos_csvm.cpp index ea6c4af80..884e8a68f 100644 --- a/bindings/Python/backends/kokkos_csvm.cpp +++ b/bindings/Python/backends/kokkos_csvm.cpp @@ -6,72 +6,102 @@ * See the LICENSE.md file in the project root for full license information. */ +#include "plssvm/backend_types.hpp" // plssvm::kokkos::backend_csvm_type_t #include "plssvm/backends/Kokkos/csvm.hpp" // plssvm::kokkos::csvm #include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception #include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space -#include "plssvm/csvm.hpp" // plssvm::csvm #include "plssvm/exceptions/exceptions.hpp" // plssvm::exception -#include "plssvm/parameter.hpp" // plssvm::parameter, plssvm::kokkos_execution_space +#include "plssvm/parameter.hpp" // plssvm::parameter +#include "plssvm/svm/csvc.hpp" // plssvm::csvc +#include "plssvm/svm/csvm.hpp" // plssvm::csvm +#include "plssvm/svm/csvr.hpp" // plssvm::csvr #include "plssvm/target_platforms.hpp" // plssvm::target_platform -#include "bindings/Python/utility.hpp" // check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception} -#include "pybind11/pybind11.h" // py::module_, py::class_, py::init -#include "pybind11/stl.h" // support for STL types +#include "fmt/format.h" // fmt::format +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::exception +#include "pybind11/pytypes.h" // py::kwargs #include // std::make_unique +#include // std::string namespace py = pybind11; -void init_kokkos_csvm(py::module_ &m, const py::exception &base_exception) { - // use its own submodule for the Kokkos CSVM bindings - py::module_ kokkos_module = m.def_submodule("kokkos", "a module containing all Kokkos backend specific functionality"); +namespace { + +template +void bind_kokkos_csvms(py::module_ &m, const std::string &csvm_name) { + using backend_csvm_type = plssvm::kokkos::backend_csvm_type_t; - // bind the CSVM using the Kokkos backend - py::class_(kokkos_module, "CSVM") - .def(py::init<>(), "create an SVM with the automatic target platform and default parameter object") - .def(py::init(), "create an SVM with the automatic target platform and provided parameter object") - .def(py::init(), "create an SVM with the provided target platform and default parameter object") - .def(py::init(), "create an SVM with the provided target platform and parameter object") + // assemble docstrings + const std::string class_docstring{ fmt::format("A {} using the Kokkos backend.", csvm_name) }; + const std::string param_docstring{ fmt::format("create a Kokkos {} with the provided parameters and optional Kokkos specific keyword arguments", csvm_name) }; + const std::string target_param_docstring{ fmt::format("create a Kokkos {} with the provided target platform, parameters, and optional Kokkos specific keyword arguments", csvm_name) }; + const std::string kwargs_docstring{ fmt::format("create a Kokkos {} with the provided keyword arguments (including optional Kokkos specific keyword arguments)", csvm_name) }; + const std::string target_kwargs_docstring{ fmt::format("create a Kokkos {} with the provided target platform and keyword arguments (including optional Kokkos specific keyword arguments)", csvm_name) }; + + py::class_(m, csvm_name.c_str(), class_docstring.c_str()) + .def(py::init([](const plssvm::parameter params, const py::kwargs &args) { + // check for valid keys + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kokkos_execution_space" }); + // set Kokkos execution space + const plssvm::kokkos::execution_space space = args.contains("kokkos_execution_space") ? args["kokkos_execution_space"].cast() : plssvm::kokkos::execution_space::automatic; + // create C-SVM with the default target platform + return std::make_unique(params, plssvm::kokkos_execution_space = space); + }), + param_docstring.c_str()) + .def(py::init([](const plssvm::target_platform target, const plssvm::parameter params, const py::kwargs &args) { + // check for valid keys + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kokkos_execution_space" }); + // set Kokkos execution space + const plssvm::kokkos::execution_space space = args.contains("kokkos_execution_space") ? args["kokkos_execution_space"].cast() : plssvm::kokkos::execution_space::automatic; + // create C-SVM with the default target platform + return std::make_unique(target, params, plssvm::kokkos_execution_space = space); + }), + target_param_docstring.c_str()) .def(py::init([](const py::kwargs &args) { // check for valid keys - check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost", "kokkos_execution_space" }); + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost", "kokkos_execution_space" }); // if one of the value keyword parameter is provided, set the respective value - const plssvm::parameter params = convert_kwargs_to_parameter(args); + const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args); // set Kokkos execution space const plssvm::kokkos::execution_space space = args.contains("kokkos_execution_space") ? args["kokkos_execution_space"].cast() : plssvm::kokkos::execution_space::automatic; - // create CSVM with the default target platform - return std::make_unique(params, plssvm::kokkos_execution_space = space); + // create C-SVM with the default target platform + return std::make_unique(params, plssvm::kokkos_execution_space = space); }), - "create an SVM with the default target platform and keyword arguments") + kwargs_docstring.c_str()) .def(py::init([](const plssvm::target_platform target, const py::kwargs &args) { // check for valid keys - check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost", "kokkos_execution_space" }); + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost", "kokkos_execution_space" }); // if one of the value keyword parameter is provided, set the respective value - const plssvm::parameter params = convert_kwargs_to_parameter(args); + const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args); // set Kokkos execution space const plssvm::kokkos::execution_space space = args.contains("kokkos_execution_space") ? args["kokkos_execution_space"].cast() : plssvm::kokkos::execution_space::automatic; - // create CSVM with the provided target platform - return std::make_unique(target, params, plssvm::kokkos_execution_space = space); + // create C-SVM with the provided target platform + return std::make_unique(target, params, plssvm::kokkos_execution_space = space); }), - "create an SVM with the provided target platform and keyword arguments") - .def("get_execution_space", &plssvm::kokkos::csvm::get_execution_space, "get the Kokkos execution space used in this Kokkos SVM"); + target_kwargs_docstring.c_str()) + .def("get_execution_space", &plssvm::kokkos::csvm::get_execution_space, "get the Kokkos execution space used in this Kokkos C-SVM") + .def("__repr__", [csvm_name](const backend_csvm_type &self) { + return fmt::format("", csvm_name, self.num_available_devices(), self.get_execution_space()); + }); +} - // register Kokkos backend specific exceptions - register_py_exception(kokkos_module, "BackendError", base_exception); +} // namespace - // bind the execution space enum classes - py::enum_(kokkos_module, "ExecutionSpace") - .value("AUTOMATIC", plssvm::kokkos::execution_space::cuda, "automatically determine the used Kokkos execution space (note: this does not necessarily correspond to Kokkos::DefaultExecutionSpace)") - .value("CUDA", plssvm::kokkos::execution_space::cuda, "execution space representing execution on a CUDA device") - .value("HIP", plssvm::kokkos::execution_space::hip, "execution space representing execution on a device supported by HIP") - .value("SYCL", plssvm::kokkos::execution_space::sycl, "execution space representing execution on a device supported by SYCL") - .value("HPX", plssvm::kokkos::execution_space::hpx, "execution space representing execution with the HPX runtime system") - .value("OPENMP", plssvm::kokkos::execution_space::openmp, "execution space representing execution with the OpenMP runtime system") - .value("OPENMPTARGET", plssvm::kokkos::execution_space::openmp_target, "execution space representing execution using the target offloading feature of the OpenMP runtime system") - .value("OPENACC", plssvm::kokkos::execution_space::openacc, "execution space representing execution with the OpenACC runtime system") - .value("THREADS", plssvm::kokkos::execution_space::threads, "execution space representing parallel execution with std::threads") - .value("SERIAL", plssvm::kokkos::execution_space::serial, "execution space representing serial execution on the CPU; should always be available"); +void init_kokkos_csvm(py::module_ &m, const py::exception &base_exception) { + // use its own submodule for the Kokkos C-SVM bindings + py::module_ kokkos_module = m.def_submodule("kokkos", "a module containing all Kokkos backend specific functionality"); + const py::module_ kokkos_pure_virtual_module = kokkos_module.def_submodule("__pure_virtual", "a module containing all pure-virtual Kokkos backend specific functionality"); + + // bind the pure-virtual base Kokkos C-SVM + [[maybe_unused]] const py::class_ virtual_base_kokkos_csvm(kokkos_pure_virtual_module, "__pure_virtual_kokkos_base_CSVM"); - kokkos_module.def("list_available_execution_spaces", &plssvm::kokkos::list_available_execution_spaces, "list all available Kokkos execution spaces"); + // bind the specific Kokkos C-SVC and C-SVR classes + bind_kokkos_csvms(kokkos_module, "CSVC"); + bind_kokkos_csvms(kokkos_module, "CSVR"); + + // register Kokkos backend specific exceptions + plssvm::bindings::python::util::register_py_exception(kokkos_module, "BackendError", base_exception); } diff --git a/bindings/Python/backends/opencl_csvm.cpp b/bindings/Python/backends/opencl_csvm.cpp index d9c74f4c2..77ae0f2d8 100644 --- a/bindings/Python/backends/opencl_csvm.cpp +++ b/bindings/Python/backends/opencl_csvm.cpp @@ -6,51 +6,80 @@ * See the LICENSE.md file in the project root for full license information. */ +#include "plssvm/backend_types.hpp" // plssvm::opencl::backend_csvm_type_t #include "plssvm/backends/OpenCL/csvm.hpp" // plssvm::opencl::csvm #include "plssvm/backends/OpenCL/exceptions.hpp" // plssvm::opencl::backend_exception -#include "plssvm/csvm.hpp" // plssvm::csvm #include "plssvm/exceptions/exceptions.hpp" // plssvm::exception #include "plssvm/parameter.hpp" // plssvm::parameter +#include "plssvm/svm/csvc.hpp" // plssvm::csvc +#include "plssvm/svm/csvm.hpp" // plssvm::csvm +#include "plssvm/svm/csvr.hpp" // plssvm::csvr #include "plssvm/target_platforms.hpp" // plssvm::target_platform -#include "bindings/Python/utility.hpp" // check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception} -#include "pybind11/pybind11.h" // py::module_, py::class_, py::init -#include "pybind11/stl.h" // support for STL types +#include "fmt/format.h" // fmt::format +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::exception +#include "pybind11/pytypes.h" // py::kwargs #include // std::make_unique +#include // std::string namespace py = pybind11; -void init_opencl_csvm(py::module_ &m, const py::exception &base_exception) { - // use its own submodule for the OpenCL CSVM bindings - py::module_ opencl_module = m.def_submodule("opencl", "a module containing all OpenCL backend specific functionality"); +namespace { + +template +void bind_opencl_csvms(py::module_ &m, const std::string &csvm_name) { + using backend_csvm_type = plssvm::opencl::backend_csvm_type_t; - // bind the CSVM using the OpenCL backend - py::class_(opencl_module, "CSVM") - .def(py::init<>(), "create an SVM with the automatic target platform and default parameter object") - .def(py::init(), "create an SVM with the automatic target platform and provided parameter object") - .def(py::init(), "create an SVM with the provided target platform and default parameter object") - .def(py::init(), "create an SVM with the provided target platform and parameter object") + // assemble docstrings + const std::string class_docstring{ fmt::format("A {} using the OpenCL backend.", csvm_name) }; + const std::string param_docstring{ fmt::format("create an OpenCL {} with provided parameters", csvm_name) }; + const std::string target_param_docstring{ fmt::format("create an OpenCL {} with the provided target platform and parameters", csvm_name) }; + const std::string kwargs_docstring{ fmt::format("create an OpenCL {} with the provided keyword arguments", csvm_name) }; + const std::string target_kwargs_docstring{ fmt::format("create an OpenCL {} with the provided target platform and keyword arguments", csvm_name) }; + + py::class_(m, csvm_name.c_str(), class_docstring.c_str()) + .def(py::init(), param_docstring.c_str()) + .def(py::init(), target_param_docstring.c_str()) .def(py::init([](const py::kwargs &args) { // check for valid keys - check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); // if one of the value keyword parameter is provided, set the respective value - const plssvm::parameter params = convert_kwargs_to_parameter(args); - // create CSVM with the default target platform - return std::make_unique(params); + const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args); + // create C-SVM with the default target platform + return std::make_unique(params); }), - "create an SVM with the default target platform and keyword arguments") + kwargs_docstring.c_str()) .def(py::init([](const plssvm::target_platform target, const py::kwargs &args) { // check for valid keys - check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); // if one of the value keyword parameter is provided, set the respective value - const plssvm::parameter params = convert_kwargs_to_parameter(args); - // create CSVM with the provided target platform - return std::make_unique(target, params); + const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args); + // create C-SVM with the provided target platform + return std::make_unique(target, params); }), - "create an SVM with the provided target platform and keyword arguments"); + target_kwargs_docstring.c_str()) + .def("__repr__", [csvm_name](const backend_csvm_type &self) { + return fmt::format("", csvm_name, self.num_available_devices()); + }); +} + +} // namespace + +void init_opencl_csvm(py::module_ &m, const py::exception &base_exception) { + // use its own submodule for the OpenCL C-SVM bindings + py::module_ opencl_module = m.def_submodule("opencl", "a module containing all OpenCL backend specific functionality"); + const py::module_ opencl_pure_virtual_module = opencl_module.def_submodule("__pure_virtual", "a module containing all pure-virtual OpenCL backend specific functionality"); + + // bind the pure-virtual base OpenCL C-SVM + [[maybe_unused]] const py::class_ virtual_base_opencl_csvm(opencl_pure_virtual_module, "__pure_virtual_opencl_base_CSVM"); + + // bind the specific OpenCL C-SVC and C-SVR classes + bind_opencl_csvms(opencl_module, "CSVC"); + bind_opencl_csvms(opencl_module, "CSVR"); // register OpenCL backend specific exceptions - register_py_exception(opencl_module, "BackendError", base_exception); + plssvm::bindings::python::util::register_py_exception(opencl_module, "BackendError", base_exception); } diff --git a/bindings/Python/backends/openmp_csvm.cpp b/bindings/Python/backends/openmp_csvm.cpp index 099e73e98..376329474 100644 --- a/bindings/Python/backends/openmp_csvm.cpp +++ b/bindings/Python/backends/openmp_csvm.cpp @@ -6,51 +6,80 @@ * See the LICENSE.md file in the project root for full license information. */ +#include "plssvm/backend_types.hpp" // plssvm::openmp::backend_csvm_type_t #include "plssvm/backends/OpenMP/csvm.hpp" // plssvm::openmp::csvm #include "plssvm/backends/OpenMP/exceptions.hpp" // plssvm::openmp::backend_exception -#include "plssvm/csvm.hpp" // plssvm::csvm #include "plssvm/exceptions/exceptions.hpp" // plssvm::exception #include "plssvm/parameter.hpp" // plssvm::parameter +#include "plssvm/svm/csvc.hpp" // plssvm::csvc +#include "plssvm/svm/csvm.hpp" // plssvm::csvm +#include "plssvm/svm/csvr.hpp" // plssvm::csvr #include "plssvm/target_platforms.hpp" // plssvm::target_platform -#include "bindings/Python/utility.hpp" // check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception} -#include "pybind11/pybind11.h" // py::module_, py::class_, py::init -#include "pybind11/stl.h" // support for STL types +#include "fmt/format.h" // fmt::format +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::exception +#include "pybind11/pytypes.h" // py::kwargs #include // std::make_unique +#include // std::string namespace py = pybind11; -void init_openmp_csvm(py::module_ &m, const py::exception &base_exception) { - // use its own submodule for the OpenMP CSVM bindings - py::module_ openmp_module = m.def_submodule("openmp", "a module containing all OpenMP backend specific functionality"); +namespace { + +template +void bind_openmp_csvms(py::module_ &m, const std::string &csvm_name) { + using backend_csvm_type = plssvm::openmp::backend_csvm_type_t; - // bind the CSVM using the OpenMP backend - py::class_(openmp_module, "CSVM") - .def(py::init<>(), "create an SVM with the automatic target platform and default parameter object") - .def(py::init(), "create an SVM with the automatic target platform and provided parameter object") - .def(py::init(), "create an SVM with the provided target platform and default parameter object") - .def(py::init(), "create an SVM with the provided target platform and parameter object") + // assemble docstrings + const std::string class_docstring{ fmt::format("A {} using the OpenMP backend.", csvm_name) }; + const std::string param_docstring{ fmt::format("create an OpenMP {} with the provided parameters", csvm_name) }; + const std::string target_param_docstring{ fmt::format("create an OpenMP {} with the provided target platform and parameters", csvm_name) }; + const std::string kwargs_docstring{ fmt::format("create an OpenMP {} with the provided keyword arguments", csvm_name) }; + const std::string target_kwargs_docstring{ fmt::format("create an OpenMP {} with the provided target platform and keyword arguments", csvm_name) }; + + py::class_(m, csvm_name.c_str(), class_docstring.c_str()) + .def(py::init(), param_docstring.c_str()) + .def(py::init(), target_param_docstring.c_str()) .def(py::init([](const py::kwargs &args) { // check for valid keys - check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); // if one of the value keyword parameter is provided, set the respective value - const plssvm::parameter params = convert_kwargs_to_parameter(args); - // create CSVM with the default target platform - return std::make_unique(params); + const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args); + // create C-SVM with the default target platform + return std::make_unique(params); }), - "create an SVM with the default target platform and keyword arguments") + kwargs_docstring.c_str()) .def(py::init([](const plssvm::target_platform target, const py::kwargs &args) { // check for valid keys - check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); // if one of the value keyword parameter is provided, set the respective value - const plssvm::parameter params = convert_kwargs_to_parameter(args); - // create CSVM with the provided target platform - return std::make_unique(target, params); + const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args); + // create C-SVM with the provided target platform + return std::make_unique(target, params); }), - "create an SVM with the provided target platform and keyword arguments"); + target_kwargs_docstring.c_str()) + .def("__repr__", [csvm_name](const backend_csvm_type &self) { + return fmt::format("", csvm_name, self.num_available_devices()); + }); +} + +} // namespace + +void init_openmp_csvm(py::module_ &m, const py::exception &base_exception) { + // use its own submodule for the OpenMP C-SVM bindings + py::module_ openmp_module = m.def_submodule("openmp", "a module containing all OpenMP backend specific functionality"); + const py::module_ openmp_pure_virtual_module = openmp_module.def_submodule("__pure_virtual", "a module containing all pure-virtual OpenMP backend specific functionality"); + + // bind the pure-virtual base OpenMP C-SVM + [[maybe_unused]] const py::class_ virtual_base_openmp_csvm(openmp_pure_virtual_module, "__pure_virtual_openmp_base_CSVM"); + + // bind the specific OpenMP C-SVC and C-SVR classes + bind_openmp_csvms(openmp_module, "CSVC"); + bind_openmp_csvms(openmp_module, "CSVR"); // register OpenMP backend specific exceptions - register_py_exception(openmp_module, "BackendError", base_exception); + plssvm::bindings::python::util::register_py_exception(openmp_module, "BackendError", base_exception); } diff --git a/bindings/Python/backends/stdpar_csvm.cpp b/bindings/Python/backends/stdpar_csvm.cpp index 945dd104f..f1dadad50 100644 --- a/bindings/Python/backends/stdpar_csvm.cpp +++ b/bindings/Python/backends/stdpar_csvm.cpp @@ -6,29 +6,77 @@ * See the LICENSE.md file in the project root for full license information. */ +#include "plssvm/backend_types.hpp" // plssvm::stdpar::backend_csvm_type_t #include "plssvm/backends/stdpar/csvm.hpp" // plssvm::stdpar::csvm #include "plssvm/backends/stdpar/exceptions.hpp" // plssvm::stdpar::backend_exception #include "plssvm/backends/stdpar/implementation_types.hpp" // plssvm::stdpar::implementation_type -#include "plssvm/csvm.hpp" // plssvm::csvm #include "plssvm/exceptions/exceptions.hpp" // plssvm::exception #include "plssvm/parameter.hpp" // plssvm::parameter +#include "plssvm/svm/csvc.hpp" // plssvm::csvc +#include "plssvm/svm/csvm.hpp" // plssvm::csvm +#include "plssvm/svm/csvr.hpp" // plssvm::csvr #include "plssvm/target_platforms.hpp" // plssvm::target_platform -#include "bindings/Python/utility.hpp" // check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception} -#include "pybind11/pybind11.h" // py::module_, py::class_, py::init +#include "fmt/format.h" // fmt::format +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::exception #include "pybind11/pytypes.h" // py::kwargs #include // std::make_unique +#include // std::string namespace py = pybind11; +namespace { + +template +void bind_stdpar_csvms(py::module_ &m, const std::string &csvm_name) { + using backend_csvm_type = plssvm::stdpar::backend_csvm_type_t; + + // assemble docstrings + const std::string class_docstring{ fmt::format("A {} using the stdpar backend.", csvm_name) }; + const std::string param_docstring{ fmt::format("create an stdpar {} with the provided parameters", csvm_name) }; + const std::string target_param_docstring{ fmt::format("create an stdpar {} with the provided target platform and parameters", csvm_name) }; + const std::string kwargs_docstring{ fmt::format("create an stdpar {} with the provided keyword arguments", csvm_name) }; + const std::string target_kwargs_docstring{ fmt::format("create an stdpar {} with the provided target platform and keyword arguments", csvm_name) }; + + py::class_(m, csvm_name.c_str()) + .def(py::init(), param_docstring.c_str()) + .def(py::init(), target_param_docstring.c_str()) + .def(py::init([](const py::kwargs &args) { + // check for valid keys + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); + // if one of the value keyword parameter is provided, set the respective value + const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args); + // create C-SVM with the default target platform + return std::make_unique(params); + }), + kwargs_docstring.c_str()) + .def(py::init([](const plssvm::target_platform target, const py::kwargs &args) { + // check for valid keys + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); + // if one of the value keyword parameter is provided, set the respective value + const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args); + // create C-SVM with the provided target platform + return std::make_unique(target, params); + }), + target_kwargs_docstring.c_str()) + .def("get_implementation_type", &plssvm::stdpar::csvm::get_implementation_type, "get the stdpar implementation used in this stdpar C-SVM") + .def("__repr__", [csvm_name](const backend_csvm_type &self) { + return fmt::format("", csvm_name, self.num_available_devices(), self.get_implementation_type()); + }); +} + +} // namespace + void init_stdpar_csvm(py::module_ &m, const py::exception &base_exception) { - // use its own submodule for the stdpar CSVM bindings + // use its own submodule for the stdpar C-SVM bindings py::module_ stdpar_module = m.def_submodule("stdpar", "a module containing all stdpar backend specific functionality"); + const py::module_ stdpar_pure_virtual_module = stdpar_module.def_submodule("__pure_virtual", "a module containing all pure-virtual stdpar backend specific functionality"); // bind the enum class - py::enum_(stdpar_module, "ImplementationType") + py::enum_(stdpar_module, "ImplementationType", "Enum class for all supported stdpar implementations in PLSSVM.") .value("NVHPC", plssvm::stdpar::implementation_type::nvhpc, "use NVIDIA's HPC SDK (NVHPC) compiler nvc++") .value("ROC_STDPAR", plssvm::stdpar::implementation_type::roc_stdpar, "use AMD's roc-stdpar compiler (patched LLVM)") .value("INTEL_LLVM", plssvm::stdpar::implementation_type::intel_llvm, "use Intel's LLVM compiler icpx") @@ -37,32 +85,13 @@ void init_stdpar_csvm(py::module_ &m, const py::exception &ba stdpar_module.def("list_available_stdpar_implementations", &plssvm::stdpar::list_available_stdpar_implementations, "list all available stdpar implementations"); - // bind the CSVM using the stdpar backend - py::class_(stdpar_module, "CSVM") - .def(py::init<>(), "create an SVM with the automatic target platform and default parameter object") - .def(py::init(), "create an SVM with the automatic target platform and provided parameter object") - .def(py::init(), "create an SVM with the provided target platform and default parameter object") - .def(py::init(), "create an SVM with the provided target platform and parameter object") - .def(py::init([](const py::kwargs &args) { - // check for valid keys - check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); - // if one of the value keyword parameter is provided, set the respective value - const plssvm::parameter params = convert_kwargs_to_parameter(args); - // create CSVM with the default target platform - return std::make_unique(params); - }), - "create an SVM with the default target platform and keyword arguments") - .def(py::init([](const plssvm::target_platform target, const py::kwargs &args) { - // check for valid keys - check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); - // if one of the value keyword parameter is provided, set the respective value - const plssvm::parameter params = convert_kwargs_to_parameter(args); - // create CSVM with the provided target platform - return std::make_unique(target, params); - }), - "create an SVM with the provided target platform and keyword arguments") - .def("get_implementation_type", &plssvm::stdpar::csvm::get_implementation_type, "get the stdpar implementation type used in this stdpar SVM"); + // bind the pure-virtual base stdpar C-SVM + [[maybe_unused]] const py::class_ virtual_base_stdpar_csvm(stdpar_pure_virtual_module, "__pure_virtual_stdpar_base_CSVM"); + + // bind the specific stdpar C-SVC and C-SVR classes + bind_stdpar_csvms(stdpar_module, "CSVC"); + bind_stdpar_csvms(stdpar_module, "CSVR"); // register stdpar backend specific exceptions - register_py_exception(stdpar_module, "BackendError", base_exception); + plssvm::bindings::python::util::register_py_exception(stdpar_module, "BackendError", base_exception); } diff --git a/bindings/Python/backends/sycl.cpp b/bindings/Python/backends/sycl.cpp index 891cb9898..0421fc308 100644 --- a/bindings/Python/backends/sycl.cpp +++ b/bindings/Python/backends/sycl.cpp @@ -11,7 +11,7 @@ #include "plssvm/backends/SYCL/kernel_invocation_types.hpp" // plssvm::sycl::kernel_invocation_type #include "plssvm/exceptions/exceptions.hpp" // plssvm::exception -#include "bindings/Python/utility.hpp" // register_py_exception +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::register_py_exception #include "pybind11/pybind11.h" // py::module_, py::enum_, py::exception #include "pybind11/stl.h" // support for STL types: std:vector @@ -31,17 +31,17 @@ void init_sycl(py::module_ &m, const py::exception &base_exce py::module_ sycl_module = m.def_submodule("sycl", "a module containing all SYCL backend specific functionality"); // register SYCL backend specific exceptions - register_py_exception(sycl_module, "BackendError", base_exception); + plssvm::bindings::python::util::register_py_exception(sycl_module, "BackendError", base_exception); // bind the two enum classes - py::enum_(sycl_module, "ImplementationType") + py::enum_(sycl_module, "ImplementationType", "Enum class for all supported SYCL implementation in PLSSVM.") .value("AUTOMATIC", plssvm::sycl::implementation_type::automatic, "use the available SYCL implementation; if more than one implementation is available, the macro PLSSVM_SYCL_BACKEND_PREFERRED_IMPLEMENTATION must be defined during the CMake configuration") .value("DPCPP", plssvm::sycl::implementation_type::dpcpp, "use DPC++ as SYCL implementation") .value("ADAPTIVECPP", plssvm::sycl::implementation_type::adaptivecpp, "use AdaptiveCpp (formerly known as hipSYCL) as SYCL implementation"); sycl_module.def("list_available_sycl_implementations", &plssvm::sycl::list_available_sycl_implementations, "list all available SYCL implementations"); - py::enum_(sycl_module, "KernelInvocationType") + py::enum_(sycl_module, "KernelInvocationType", "Enum class for all possible SYCL kernel invocation types supported in PLSSVM.") .value("AUTOMATIC", plssvm::sycl::kernel_invocation_type::automatic, "use the best kernel invocation type for the current SYCL implementation and target hardware platform") .value("ND_RANGE", plssvm::sycl::kernel_invocation_type::nd_range, "use the nd_range kernel invocation type"); @@ -53,6 +53,7 @@ void init_sycl(py::module_ &m, const py::exception &base_exce const py::module_ dpcpp_module = init_dpcpp_csvm(m, base_exception); #endif - // "alias" one of the DPC++ or AdaptiveCpp CSVMs to be the default SYCL CSVM - sycl_module.attr("CSVM") = PLSSVM_CONCATENATE(PLSSVM_SYCL_BACKEND_PREFERRED_IMPLEMENTATION, _module).attr("CSVM"); + // "alias" one of the DPC++ or AdaptiveCpp C-SVCs and C-SVRs to be the respective default SYCL C-SVC and C-SVR + sycl_module.attr("CSVC") = PLSSVM_CONCATENATE(PLSSVM_SYCL_BACKEND_PREFERRED_IMPLEMENTATION, _module).attr("CSVC"); + sycl_module.attr("CSVR") = PLSSVM_CONCATENATE(PLSSVM_SYCL_BACKEND_PREFERRED_IMPLEMENTATION, _module).attr("CSVR"); } diff --git a/bindings/Python/classification_types.cpp b/bindings/Python/classification_types.cpp index a66cf7b3f..c429973c7 100644 --- a/bindings/Python/classification_types.cpp +++ b/bindings/Python/classification_types.cpp @@ -16,7 +16,7 @@ namespace py = pybind11; void init_classification_types(py::module_ &m) { // bind enum class - py::enum_(m, "ClassificationType") + py::enum_(m, "ClassificationType", "Enum class for all implemented multiclass classification strategies.") .value("OAA", plssvm::classification_type::oaa, "use the one vs. all classification strategy (default)") .value("OAO", plssvm::classification_type::oao, "use the one vs. one classification strategy"); diff --git a/bindings/Python/csvm.cpp b/bindings/Python/csvm.cpp deleted file mode 100644 index 35dff1c54..000000000 --- a/bindings/Python/csvm.cpp +++ /dev/null @@ -1,178 +0,0 @@ -/** - * @author Alexander Van Craen - * @author Marcel Breyer - * @copyright 2018-today The PLSSVM project - All Rights Reserved - * @license This file is part of the PLSSVM project which is released under the MIT license. - * See the LICENSE.md file in the project root for full license information. - */ - -#include "plssvm/csvm.hpp" // plssvm::csvm - -#include "plssvm/backend_types.hpp" // plssvm::backend_type, plssvm::determine_default_backend, plssvm::list_available_backends -#include "plssvm/backends/SYCL/implementation_types.hpp" // plssvm::sycl::implementation_type -#include "plssvm/backends/SYCL/kernel_invocation_types.hpp" // plssvm::sycl::kernel_invocation_type -#include "plssvm/classification_types.hpp" // plssvm::classification_type -#include "plssvm/constants.hpp" // plssvm::real_type -#include "plssvm/csvm_factory.hpp" // plssvm::make_csvm -#include "plssvm/data_set.hpp" // plssvm::data_set -#include "plssvm/detail/type_list.hpp" // plssvm::detail::supported_label_types -#include "plssvm/model.hpp" // plssvm::model -#include "plssvm/parameter.hpp" // plssvm::parameter, named parameters -#include "plssvm/solver_types.hpp" // plssvm::solver_type -#include "plssvm/target_platforms.hpp" // plssvm::target_platform, plssvm::determine_default_target_platform, plssvm::list_available_target_platforms - -#include "bindings/Python/utility.hpp" // check_kwargs_for_correctness, convert_kwargs_to_parameter - -#include "fmt/format.h" // fmt::format -#include "fmt/ranges.h" // fmt::join -#include "pybind11/pybind11.h" // py::module_, py::class_, py::kwargs, py::overload_cast, py::const_ - -#include // std::size_t -#include // std::unique_ptr -#include // std::istringstream -#include // std::string -#include // std::tuple_element_t, std::tuple_size_v -#include // std::is_same_v -#include // std::integer_sequence, std::make_integer_sequence - -namespace py = pybind11; - -template -void instantiate_csvm_functions(py::class_ &c, label_type) { - c.def( - "fit", [](const plssvm::csvm &self, const plssvm::data_set &data, const py::kwargs &args) { - // check keyword arguments - check_kwargs_for_correctness(args, { "epsilon", "max_iter", "classification", "solver" }); - - auto epsilon{ plssvm::real_type{ 0.001 } }; - if (args.contains("epsilon")) { - epsilon = args["epsilon"].cast(); - } - - // can't do it with max_iter due to OAO splitting the data set - - plssvm::classification_type classification{ plssvm::classification_type::oaa }; - if (args.contains("classification")) { - classification = args["classification"].cast(); - } - - plssvm::solver_type solver{ plssvm::solver_type::automatic }; - if (args.contains("solver")) { - solver = args["solver"].cast(); - } - - if (args.contains("max_iter")) { - return self.fit(data, - plssvm::epsilon = epsilon, - plssvm::max_iter = args["max_iter"].cast(), - plssvm::classification = classification, - plssvm::solver = solver); - } else { - return self.fit(data, - plssvm::epsilon = epsilon, - plssvm::classification = classification, - plssvm::solver = solver); - } - }, - "fit a model using the current SVM on the provided data") - .def("predict", [](const plssvm::csvm &self, const plssvm::model &model, const plssvm::data_set &data) { - if constexpr (std::is_same_v) { - return self.predict(model, data); - } else { - return vector_to_pyarray(self.predict(model, data)); - } }, "predict the labels for a data set using a previously learned model") - .def("score", py::overload_cast &>(&plssvm::csvm::score, py::const_), "calculate the accuracy of the model") - .def("score", py::overload_cast &, const plssvm::data_set &>(&plssvm::csvm::score, py::const_), "calculate the accuracy of a data set using the model"); -} - -template -void instantiate_csvm_functions(py::class_ &c, std::integer_sequence) { - (instantiate_csvm_functions(c, std::tuple_element_t{}), ...); -} - -template -void instantiate_model_bindings(py::class_ &c) { - instantiate_csvm_functions(c, std::make_integer_sequence>{}); -} - -std::unique_ptr assemble_csvm(const py::kwargs &args, plssvm::parameter input_params = {}) { - // check keyword arguments - check_kwargs_for_correctness(args, { "backend", "target_platform", "kernel_type", "degree", "gamma", "coef0", "cost", "sycl_implementation_type", "sycl_kernel_invocation_type" }); - // if one of the value keyword parameter is provided, set the respective value - const plssvm::parameter params = convert_kwargs_to_parameter(args, input_params); - plssvm::backend_type backend = plssvm::determine_default_backend(); - if (args.contains("backend")) { - if (py::isinstance(args["backend"])) { - std::istringstream iss{ args["backend"].cast() }; - iss >> backend; - if (iss.fail()) { - throw py::value_error{ fmt::format("Available backends are \"{}\", got {}!", fmt::join(plssvm::list_available_backends(), ";"), args["backend"].cast()) }; - } - } else { - backend = args["backend"].cast(); - } - } - plssvm::target_platform target = plssvm::determine_default_target_platform(); - if (args.contains("target_platform")) { - if (py::isinstance(args["target_platform"])) { - std::istringstream iss{ args["target_platform"].cast() }; - iss >> target; - if (iss.fail()) { - throw py::value_error{ fmt::format("Available target platforms are \"{}\", got {}!", fmt::join(plssvm::list_available_target_platforms(), ";"), args["target_platform"].cast()) }; - } - } else { - target = args["target_platform"].cast(); - } - } - - // parse SYCL specific keyword arguments - if (backend == plssvm::backend_type::sycl) { - // sycl specific flags - plssvm::sycl::implementation_type impl_type = plssvm::sycl::implementation_type::automatic; - if (args.contains("sycl_implementation_type")) { - impl_type = args["sycl_implementation_type"].cast(); - } - plssvm::sycl::kernel_invocation_type invocation_type = plssvm::sycl::kernel_invocation_type::automatic; - if (args.contains("sycl_kernel_invocation_type")) { - invocation_type = args["sycl_kernel_invocation_type"].cast(); - } - - return plssvm::make_csvm(backend, target, params, plssvm::sycl_implementation_type = impl_type, plssvm::sycl_kernel_invocation_type = invocation_type); - } else { - return plssvm::make_csvm(backend, target, params); - } -} - -void init_csvm(py::module_ &m) { - const py::module_ pure_virtual_model = m.def_submodule("__pure_virtual"); - - py::class_ pycsvm(pure_virtual_model, "__pure_virtual_base_CSVM"); - pycsvm.def("get_params", &plssvm::csvm::get_params, "get the parameter used for this SVM") - .def( - "set_params", [](plssvm::csvm &self, const plssvm::parameter ¶ms) { - self.set_params(params); - }, - "update the parameter used for this SVM using a plssvm.Parameter object") - .def("set_params", [](plssvm::csvm &self, const py::kwargs &args) { - // check keyword arguments - check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); - // convert kwargs to parameter and update csvm internal parameter - self.set_params(convert_kwargs_to_parameter(args, self.get_params())); }, "update the parameter used for this SVM using keyword arguments") - .def("get_target_platform", &plssvm::csvm::get_target_platform, "get the actual target platform this SVM runs on") - .def("num_available_devices", &plssvm::csvm::num_available_devices, "get the number of available devices for the current SVM"); - - // instantiate all functions using all available label_type - instantiate_model_bindings(pycsvm); - - // bind plssvm::make_csvm factory function to a "generic" Python csvm class - py::class_(m, "CSVM", pycsvm, py::module_local()) - // IMPLICIT BACKEND - .def(py::init([](const py::kwargs &args) { - return assemble_csvm(args); - }), - "create an SVM with the provided keyword arguments") - .def(py::init([](const plssvm::parameter ¶ms, const py::kwargs &args) { - return assemble_csvm(args, params); - }), - "create an SVM with the provided parameters and keyword arguments; the values in params will be overwritten by the keyword arguments"); -} diff --git a/bindings/Python/data_set.cpp b/bindings/Python/data_set.cpp deleted file mode 100644 index 9a1278cfa..000000000 --- a/bindings/Python/data_set.cpp +++ /dev/null @@ -1,246 +0,0 @@ -/** - * @author Alexander Van Craen - * @author Marcel Breyer - * @copyright 2018-today The PLSSVM project - All Rights Reserved - * @license This file is part of the PLSSVM project which is released under the MIT license. - * See the LICENSE.md file in the project root for full license information. - */ - -#include "plssvm/data_set.hpp" // plssvm::data_set - -#include "plssvm/constants.hpp" // plssvm::real_type -#include "plssvm/detail/type_list.hpp" // plssvm::detail::supported_label_types -#include "plssvm/file_format_types.hpp" // plssvm::file_format_type - -#include "bindings/Python/utility.hpp" // check_kwargs_for_correctness, assemble_unique_class_name, - // pyarray_to_vector, pyarray_to_string_vector, pylist_to_string_vector, pyarray_to_matrix - -#include "fmt/format.h" // fmt::format -#include "fmt/ranges.h" // fmt::join -#include "pybind11/numpy.h" // py::array_t -#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::return_value_policy, py::arg, py::kwargs, py::value_error, py::pos_only, py::list -#include "pybind11/stl.h" // support for STL types - -#include // std::array -#include // std::size_t -#include // std::string -#include // std::tuple_element_t, std::tuple_size_v -#include // std::is_same_v -#include // std::integer_sequence, std::make_integer_sequence - -namespace py = pybind11; - -template -typename data_set_type::scaling create_scaling_object(const py::kwargs &args) { - if (args.contains("scaling")) { - typename data_set_type::scaling scaling{ plssvm::real_type{ -1.0 }, plssvm::real_type{ 1.0 } }; - - // try to directly convert it to a plssvm::data_set_type::scaling object - try { - scaling = args["scaling"].cast(); - } catch (const py::cast_error &) { - // can't cast to plssvm::data_set_type::scaling - // -> try a std::array instead! - try { - const auto interval = args["scaling"].cast>(); - scaling = typename data_set_type::scaling{ interval[0], interval[1] }; - } catch (...) { - // rethrow exception if this also did not succeed - throw; - } - } - return scaling; - } else { - throw py::attribute_error{ "Can't extract scaling information, no scaling keyword argument given!" }; - } -} - -template -void instantiate_data_set_bindings(py::module_ &m, label_type) { - using data_set_type = plssvm::data_set; - using size_type = typename data_set_type::size_type; - - // create the Python type names based on the provided real_type and label_type - const std::string class_name_scaling_factors = assemble_unique_class_name("DataSetScalingFactors"); - const std::string class_name_scaling = assemble_unique_class_name("DataSetScaling"); - const std::string class_name = assemble_unique_class_name("DataSet"); - - PYBIND11_NUMPY_DTYPE(typename data_set_type::scaling::factors, feature, lower, upper); - // bind the plssvm::data_set::scaling internal "factors" struct - py::class_(m, class_name_scaling_factors.c_str()) - .def(py::init(), "create a new scaling factor", py::arg("feature"), py::arg("lower"), py::arg("upper")) - .def_readonly("feature", &data_set_type::scaling::factors::feature, "the feature index for which the factors are valid") - .def_readonly("lower", &data_set_type::scaling::factors::lower, "the lower scaling factor") - .def_readonly("upper", &data_set_type::scaling::factors::upper, "the upper scaling factor") - .def("__repr__", [class_name_scaling_factors](const typename data_set_type::scaling::factors &self) { - return fmt::format("", - class_name_scaling_factors, - self.feature, - self.lower, - self.upper); - }); - - // bind the plssvm::data_set internal "scaling" struct - py::class_(m, class_name_scaling.c_str()) - .def(py::init(), "create new scaling factors for the range [lower, upper]", py::arg("lower"), py::arg("upper")) - .def(py::init([](const std::array interval) { - return typename data_set_type::scaling{ interval[0], interval[1] }; - }), - "create new scaling factors for the range [lower, upper]") - .def(py::init(), "read the scaling factors from the file") - .def("save", &data_set_type::scaling::save, "save the scaling factors to a file") - .def_readonly("scaling_interval", &data_set_type::scaling::scaling_interval, "the interval to which the data points are scaled") - .def_property_readonly( - "scaling_factors", [](const typename data_set_type::scaling &scaling) { - return vector_to_pyarray(scaling.scaling_factors); - }, - "the scaling factors for each feature") - .def("__repr__", [class_name_scaling](const typename data_set_type::scaling &self) { - return fmt::format("", - class_name_scaling, - self.scaling_interval.first, - self.scaling_interval.second, - self.scaling_factors.size()); - }); - - // bind the data set class - py::class_ py_data_set(m, class_name.c_str()); - // bind constructor taking a data set file - py_data_set.def(py::init([](const std::string &file_name, py::kwargs args) { - // check for valid keys - check_kwargs_for_correctness(args, { "file_format", "scaling" }); - - // call the constructor corresponding to the provided keyword arguments - if (args.contains("file_format") && args.contains("scaling")) { - return data_set_type{ file_name, args["file_format"].cast(), create_scaling_object(args) }; - } else if (args.contains("file_format")) { - return data_set_type{ file_name, args["file_format"].cast() }; - } else if (args.contains("scaling")) { - return data_set_type{ file_name, create_scaling_object(args) }; - } else { - return data_set_type{ file_name }; - } - }), - "create a new data set from the provided file and additional optional parameters"); - // bind constructor taking only data points without labels - py_data_set.def(py::init([](py::array_t data, py::kwargs args) { - // check keyword arguments - check_kwargs_for_correctness(args, { "scaling" }); - - if (args.contains("scaling")) { - return data_set_type{ pyarray_to_matrix(data), create_scaling_object(args) }; - } else { - return data_set_type{ pyarray_to_matrix(data) }; - } - }), - "create a new data set without labels given additional optional parameters"); - - if constexpr (!std::is_same_v) { - py_data_set.def(py::init([](py::array_t data, py::array_t labels, py::kwargs args) { - // check keyword arguments - check_kwargs_for_correctness(args, { "scaling" }); - - if (args.contains("scaling")) { - return data_set_type{ pyarray_to_matrix(data), pyarray_to_vector(labels), create_scaling_object(args) }; - } else { - return data_set_type{ pyarray_to_matrix(data), pyarray_to_vector(labels) }; - } - }), - "create a new data set with labels from a numpy array given additional optional parameters"); - } else { - // if the requested label_type is std::string, accept numpy arrays with real_type and convert them to a std::string internally - py_data_set.def(py::init([](py::array_t data, py::array_t labels, py::kwargs args) { - // check keyword arguments - check_kwargs_for_correctness(args, { "scaling" }); - - if (args.contains("scaling")) { - return data_set_type{ pyarray_to_matrix(data), pyarray_to_string_vector(labels), create_scaling_object(args) }; - } else { - return data_set_type{ pyarray_to_matrix(data), pyarray_to_string_vector(labels) }; - } - }), - "create a new data set with labels from a numpy array given additional optional parameters"); - // if the requested label_type is std::string, accept a python list (which can contain py::str) and convert them to a std::string internally - py_data_set.def(py::init([](py::array_t data, const py::list &labels, py::kwargs args) { - // check keyword arguments - check_kwargs_for_correctness(args, { "scaling" }); - - if (args.contains("scaling")) { - return data_set_type{ pyarray_to_matrix(data), pylist_to_string_vector(labels), create_scaling_object(args) }; - } else { - return data_set_type{ pyarray_to_matrix(data), pylist_to_string_vector(labels) }; - } - }), - "create a new data set with labels from a Python list given additional optional parameters"); - } - - py_data_set.def("save", py::overload_cast(&data_set_type::save, py::const_), "save the data set to a file using the provided file format type") - .def("save", py::overload_cast(&data_set_type::save, py::const_), "save the data set to a file automatically deriving the file format type from the file extension") - .def("num_data_points", &data_set_type::num_data_points, "the number of data points in the data set") - .def("num_features", &data_set_type::num_features, "the number of features per data point") - .def("data", [](const data_set_type &data) { return matrix_to_pyarray(data.data()); }, "the data saved as 2D vector") - .def("has_labels", &data_set_type::has_labels, "check whether the data set has labels") - .def("labels", [](const data_set_type &self) { - if (!self.has_labels()) { - throw py::attribute_error{ "'DataSet' object has no function 'labels'. Maybe this DataSet was created without labels?" }; - } else { - if constexpr (std::is_same_v) { - return self.labels()->get(); - } else { - return vector_to_pyarray(self.labels()->get()); - } - } }, "the labels") - .def("num_classes", &data_set_type::num_classes, "the number of classes") - .def("classes", [](const data_set_type &self) { - if (!self.has_labels()) { - throw py::attribute_error{ "'DataSet' object has no function 'classes'. Maybe this DataSet was created without labels?" }; - } else { - if constexpr (std::is_same_v) { - return self.classes().value(); - } else { - return vector_to_pyarray(self.classes().value()); - } - } }, "the classes") - .def("is_scaled", &data_set_type::is_scaled, "check whether the original data has been scaled to [lower, upper] bounds") - .def("scaling_factors", [](const data_set_type &self) { - if (!self.is_scaled()) { - throw py::attribute_error{ "'DataSet' object has no function 'scaling_factors'. Maybe this DataSet has not been scaled?" }; - } else { - return self.scaling_factors().value(); - } }, py::return_value_policy::reference_internal, "the factors used to scale this data set") - .def("__repr__", [class_name](const data_set_type &self) { - std::string optional_repr{}; - if (self.has_labels()) { - optional_repr += fmt::format(", classes: [{}]", fmt::join(self.classes().value(), ", ")); - } - if (self.is_scaled()) { - optional_repr += fmt::format(", scaling: [{}, {}]", - self.scaling_factors()->get().scaling_interval.first, - self.scaling_factors()->get().scaling_interval.second); - } - return fmt::format("", - class_name, - self.num_data_points(), - self.num_features(), - optional_repr); }); -} - -template -void instantiate_data_set_bindings(py::module_ &m, std::integer_sequence) { - (instantiate_data_set_bindings(m, std::tuple_element_t{}), ...); -} - -template -void instantiate_data_set_bindings(py::module_ &m) { - instantiate_data_set_bindings(m, std::make_integer_sequence>{}); -} - -void init_data_set(py::module_ &m) { - // bind all data_set classes - instantiate_data_set_bindings(m); - - // create aliases - m.attr("DataSetScalingFactors") = m.attr(assemble_unique_class_name("DataSetScalingFactors").c_str()); - m.attr("DataSetScaling") = m.attr(assemble_unique_class_name("DataSetScaling").c_str()); - m.attr("DataSet") = m.attr(assemble_unique_class_name("DataSet").c_str()); -} diff --git a/bindings/Python/data_set/classification_data_set.cpp b/bindings/Python/data_set/classification_data_set.cpp new file mode 100644 index 000000000..89d62ebb3 --- /dev/null +++ b/bindings/Python/data_set/classification_data_set.cpp @@ -0,0 +1,149 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "plssvm/data_set/classification_data_set.hpp" // plssvm::classification_data_set + +#include "plssvm/constants.hpp" // plssvm::real_type +#include "plssvm/data_set/min_max_scaler.hpp" // plssvm::min_max_scaler +#include "plssvm/detail/type_traits.hpp" // plssvm::detail::remove_cvref_t +#include "plssvm/file_format_types.hpp" // plssvm::file_format_type +#include "plssvm/matrix.hpp" // plssvm::soa_matrix + +#include "bindings/Python/data_set/variant_wrapper.hpp" // plssvm::bindings::python::util::classification_data_set_wrapper +#include "bindings/Python/type_caster/label_vector_wrapper_caster.hpp" // a custom Pybind11 type caster for a plssvm::bindings::python::util::label_vector_wrapper +#include "bindings/Python/type_caster/matrix_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::matrix +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{create_instance, python_type_name_mapping, vector_to_pyarray} + +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join +#include "pybind11/numpy.h" // py::array_t, py::array +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::arg, py::pos_only, py::attribute_error +#include "pybind11/pytypes.h" // py::type +#include "pybind11/stl.h" // support for STL types + +#include // std::make_unique +#include // std::optional, std::nullopt +#include // std::string +#include // std::move +#include // std::visit + +namespace py = pybind11; + +void init_classification_data_set(py::module_ &m) { + using plssvm::bindings::python::util::classification_data_set_wrapper; + + py::class_(m, "ClassificationDataSet", "Encapsulate all necessary data that is needed for training or predicting using an C-SVC.") + .def(py::init([](const std::string &filename, const std::optional type, const plssvm::file_format_type format, const std::optional scaler) { + if (type.has_value()) { + if (scaler.has_value()) { + return std::make_unique(plssvm::bindings::python::util::create_instance(type.value(), filename, format, scaler.value())); + } else { + return std::make_unique(plssvm::bindings::python::util::create_instance(type.value(), filename, format)); + } + } else { + if (scaler.has_value()) { + return std::make_unique(plssvm::classification_data_set{ filename, format, scaler.value() }); + } else { + return std::make_unique(plssvm::classification_data_set{ filename, format }); + } + } + }), + "create a new data set from the provided file and additional optional parameters like the used label type", + py::arg("filename"), + py::pos_only(), + py::arg("type") = std::nullopt, + py::arg("format") = plssvm::file_format_type::libsvm, + py::arg("scaler") = std::nullopt) + .def(py::init([](plssvm::soa_matrix data, const std::optional type, const std::optional scaler) { + if (type.has_value()) { + if (scaler.has_value()) { + return std::make_unique(plssvm::bindings::python::util::create_instance(type.value(), std::move(data), scaler.value())); + } else { + return std::make_unique(plssvm::bindings::python::util::create_instance(type.value(), std::move(data))); + } + } else { + if (scaler.has_value()) { + return std::make_unique(plssvm::classification_data_set{ std::move(data), scaler.value() }); + } else { + return std::make_unique(plssvm::classification_data_set{ std::move(data) }); + } + } + }), + "create a new data set from the provided data and additional optional parameters like the used label type", + py::arg("X"), + py::pos_only(), + py::arg("type") = std::nullopt, + py::arg("scaler") = std::nullopt) + .def(py::init([](plssvm::soa_matrix data, plssvm::bindings::python::util::label_vector_wrapper labels, const std::optional scaler) { + return std::visit([&](auto &&labels_vector) { + using label_type = typename plssvm::detail::remove_cvref_t::value_type; + if (scaler.has_value()) { + return std::make_unique(plssvm::classification_data_set(std::move(data), std::move(labels_vector), scaler.value())); + } else { + return std::make_unique(plssvm::classification_data_set(std::move(data), std::move(labels_vector))); + } + }, + labels.labels); + }), + "create a new data set from the provided data and labels and additional optional parameters", + py::arg("X"), + py::arg("y"), + py::pos_only(), + py::arg("scaler") = std::nullopt) + .def("save", [](const classification_data_set_wrapper &self, const std::string &filename, const plssvm::file_format_type format) { std::visit([&filename, format](auto &&data) { data.save(filename, format); }, self.data_set); }, "save the data set to a file using the provided file format type", py::arg("filename"), py::pos_only(), py::arg("format") = plssvm::file_format_type::libsvm) + .def("data", [](const classification_data_set_wrapper &self) { return std::visit([](auto &&data) { return py::cast(data.data()); }, self.data_set); }, "the data saved as 2D vector") + .def("has_labels", [](const classification_data_set_wrapper &self) { return std::visit([](auto &&data) { return data.has_labels(); }, self.data_set); }, "check whether the data set has labels") + // clang-format off + .def("labels", [](const classification_data_set_wrapper &self) { + return std::visit([](auto &&data) { + if (!data.has_labels()) { + throw py::attribute_error{ "'ClassificationDataSet' object has no function 'labels'. Maybe this ClassificationDataSet was created without labels?" }; + } else { + return plssvm::bindings::python::util::vector_to_pyarray(data.labels()->get()); + } + }, self.data_set); }, "the labels") + // clang-format on + .def("num_data_points", [](const classification_data_set_wrapper &self) { return std::visit([](auto &&data) { return data.num_data_points(); }, self.data_set); }, "the number of data points in the data set") + .def("num_features", [](const classification_data_set_wrapper &self) { return std::visit([](auto &&data) { return data.num_features(); }, self.data_set); }, "the number of features per data point") + .def("is_scaled", [](const classification_data_set_wrapper &self) { return std::visit([](auto &&data) { return data.is_scaled(); }, self.data_set); }, "check whether the original data has been scaled to [lower, upper] bounds") + .def("scaling_factors", [](const classification_data_set_wrapper &self) { return std::visit([](auto &&data) { + if (!data.is_scaled()) { + throw py::attribute_error{ "'ClassificationDataSet' object has no function 'scaling_factors'. Maybe this ClassificationDataSet has not been scaled?" }; + } else { + return data.scaling_factors().value(); + } }, self.data_set); }, py::return_value_policy::reference_internal, "the factors used to scale this data set") + .def("num_classes", [](const classification_data_set_wrapper &self) { return std::visit([](auto &&data) { return data.num_classes(); }, self.data_set); }, "the number of classes") + // clang-format off + .def("classes", [](const classification_data_set_wrapper &self) { + return std::visit([](auto &&data) { + if (!data.has_labels()) { + throw py::attribute_error{ "'ClassificationDataSet' object has no function 'classes'. Maybe this ClassificationDataSet was created without labels?" }; + } else { + return plssvm::bindings::python::util::vector_to_pyarray(data.classes().value()); + } + }, self.data_set); }, "the number of classes") + .def("__repr__", [](const classification_data_set_wrapper &self) { + return std::visit([](auto &&data) { + std::string optional_repr{}; + if (data.has_labels()) { + optional_repr += fmt::format(", classes: [{}]", fmt::join(data.classes().value(), ", ")); + } + if (data.is_scaled()) { + optional_repr += fmt::format(", scaling: [{}, {}]", + data.scaling_factors()->get().scaling_interval().first, + data.scaling_factors()->get().scaling_interval().second); + } + using label_type = typename plssvm::detail::remove_cvref_t::label_type; + return fmt::format("", + plssvm::bindings::python::util::python_type_name_mapping(), + data.num_data_points(), + data.num_features(), + optional_repr); + }, self.data_set); }); + // clang-format on +} diff --git a/bindings/Python/data_set/min_max_scaler.cpp b/bindings/Python/data_set/min_max_scaler.cpp new file mode 100644 index 000000000..478cb306f --- /dev/null +++ b/bindings/Python/data_set/min_max_scaler.cpp @@ -0,0 +1,78 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "plssvm/data_set/min_max_scaler.hpp" // plssvm::min_max_scaler + +#include "plssvm/constants.hpp" // plssvm::real_type + +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::vector_to_pyarray + +#include "fmt/format.h" // fmt::format +#include "pybind11/numpy.h" // py::array +#include "pybind11/pybind11.h" // PYBIND11_NUMPY_DTYPE, py::module_, py::class_, py::init, py::arg +#include "pybind11/pytypes.h" // py::type +#include "pybind11/stl.h" // support for STL types + +#include // std::array +#include // std::size_t +#include // std::optional, std::nullopt +#include // std::string + +namespace py = pybind11; + +void init_min_max_scaler(py::module_ &m) { + PYBIND11_NUMPY_DTYPE(plssvm::min_max_scaler::factors, feature, lower, upper); + + // bind the plssvm::min_max_scaler::factors struct + py::class_(m, "MinMaxScalerFactors", "The calculated or read feature-wise scaling factors.") + .def(py::init(), "create a new scaling factor", py::arg("feature_index"), py::arg("lower"), py::arg("upper")) + .def_readonly("feature_index", &plssvm::min_max_scaler::factors::feature, "the feature index for which the factors are valid") + .def_readonly("lower", &plssvm::min_max_scaler::factors::lower, "the lower scaling factor") + .def_readonly("upper", &plssvm::min_max_scaler::factors::upper, "the upper scaling factor") + .def("__repr__", [](const plssvm::min_max_scaler::factors &self) { + return fmt::format("", + self.feature, + self.lower, + self.upper); + }); + + // bind the plssvm::min_max_scaler class + py::class_(m, "MinMaxScaler", "Implements all necessary data and functions needed for scaling a plssvm::data_set to an user-defined range [lower, upper].") + .def(py::init(), "create new scaling factors for the range [lower, upper]", py::arg("lower"), py::arg("upper")) + .def(py::init([](const std::array interval) { + return plssvm::min_max_scaler{ interval[0], interval[1] }; + }), + "create new scaling factors for the range [lower, upper]") + .def(py::init([](const py::tuple interval) { + if (interval.size() != 2) { + throw py::value_error{ fmt::format("MinMaxScaler can only be created from two interval values (lower, upper), but {} were provided!", interval.size()) }; + } + return plssvm::min_max_scaler{ interval[0].cast(), interval[1].cast() }; + }), + "create new scaling factors for the range [lower, upper]") + .def(py::init(), "read the scaling factors from the file") + .def("save", &plssvm::min_max_scaler::save, "save the scaling factors to a file") + .def("scaling_interval", &plssvm::min_max_scaler::scaling_interval, "the interval to which the data points are scaled") + .def("scaling_factors", [](const plssvm::min_max_scaler &self) -> std::optional { + const auto scaling_factors = self.scaling_factors(); + if (scaling_factors.has_value()) { + return plssvm::bindings::python::util::vector_to_pyarray(scaling_factors.value()); + } else { + return std::nullopt; + } }, "the scaling factors for each feature") + .def("__repr__", [](const plssvm::min_max_scaler &self) { + std::string optional_repr{}; + const auto scaling_factors = self.scaling_factors(); + if (scaling_factors.has_value()) { + optional_repr += fmt::format(", #factors: {}", scaling_factors->size()); + } + return fmt::format("", + self.scaling_interval().first, + self.scaling_interval().second, + optional_repr); }); +} diff --git a/bindings/Python/data_set/regression_data_set.cpp b/bindings/Python/data_set/regression_data_set.cpp new file mode 100644 index 000000000..2cc304ede --- /dev/null +++ b/bindings/Python/data_set/regression_data_set.cpp @@ -0,0 +1,137 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "plssvm/data_set/regression_data_set.hpp" // plssvm::regression_data_set + +#include "plssvm/constants.hpp" // plssvm::real_type +#include "plssvm/data_set/min_max_scaler.hpp" // plssvm::min_max_scaler +#include "plssvm/detail/type_traits.hpp" // plssvm::detail::remove_cvref_t +#include "plssvm/file_format_types.hpp" // plssvm::file_format_type +#include "plssvm/matrix.hpp" // plssvm::soa_matrix + +#include "bindings/Python/data_set/variant_wrapper.hpp" // plssvm::bindings::python::util::regression_data_set_wrapper +#include "bindings/Python/type_caster/label_vector_wrapper_caster.hpp" // a custom Pybind11 type caster for a plssvm::bindings::python::util::label_vector_wrapper +#include "bindings/Python/type_caster/matrix_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::matrix +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{create_instance, python_type_name_mapping, vector_to_pyarray} + +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join +#include "pybind11/numpy.h" // py::array_t, py::array +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::arg, py::pos_only, py::object, py::attribute_error +#include "pybind11/pytypes.h" // py::type +#include "pybind11/stl.h" // support for STL types + +#include // std::make_unique +#include // std::optional, std::nullopt +#include // std::string +#include // std::move +#include // std::visit + +namespace py = pybind11; + +void init_regression_data_set(py::module_ &m) { + using plssvm::bindings::python::util::regression_data_set_wrapper; + + py::class_(m, "RegressionDataSet", "Encapsulate all necessary data that is needed for training or predicting using an C-SVR.") + .def(py::init([](const std::string &filename, const std::optional type, const plssvm::file_format_type format, const std::optional scaler) { + if (type.has_value()) { + if (scaler.has_value()) { + return std::make_unique(plssvm::bindings::python::util::create_instance(type.value(), filename, format, scaler.value())); + } else { + return std::make_unique(plssvm::bindings::python::util::create_instance(type.value(), filename, format)); + } + } else { + if (scaler.has_value()) { + return std::make_unique(plssvm::regression_data_set{ filename, format, scaler.value() }); + } else { + return std::make_unique(plssvm::regression_data_set{ filename, format }); + } + } + }), + "create a new data set from the provided file and additional optional parameters", + py::arg("filename"), + py::pos_only(), + py::arg("type") = std::nullopt, + py::arg("format") = plssvm::file_format_type::libsvm, + py::arg("scaler") = std::nullopt) + .def(py::init([](plssvm::soa_matrix data, const std::optional type, const std::optional scaler) { + if (type.has_value()) { + if (scaler.has_value()) { + return std::make_unique(plssvm::bindings::python::util::create_instance(type.value(), std::move(data), scaler.value())); + } else { + return std::make_unique(plssvm::bindings::python::util::create_instance(type.value(), std::move(data))); + } + } else { + if (scaler.has_value()) { + return std::make_unique(plssvm::regression_data_set{ std::move(data), scaler.value() }); + } else { + return std::make_unique(plssvm::regression_data_set{ std::move(data) }); + } + } + }), + "create a new data set from the provided file and additional optional parameters", + py::arg("X"), + py::pos_only(), + py::arg("type") = std::nullopt, + py::arg("scaler") = std::nullopt) + .def(py::init([](plssvm::soa_matrix data, plssvm::bindings::python::util::label_vector_wrapper labels, const std::optional scaler) { + return std::visit([&](auto &&labels_vector) { + using label_type = typename plssvm::detail::remove_cvref_t::value_type; + if (scaler.has_value()) { + return std::make_unique(plssvm::regression_data_set(std::move(data), std::move(labels_vector), scaler.value())); + } else { + return std::make_unique(plssvm::regression_data_set(std::move(data), std::move(labels_vector))); + } + }, + labels.labels); + }), + "create a new data set from the provided file and additional optional parameters", + py::arg("X"), + py::arg("y"), + py::pos_only(), + py::arg("scaler") = std::nullopt) + .def("save", [](const regression_data_set_wrapper &self, const std::string &filename, const plssvm::file_format_type format) { std::visit([&filename, format](auto &&data) { data.save(filename, format); }, self.data_set); }, "save the data set to a file using the provided file format type", py::arg("filename"), py::pos_only(), py::arg("format") = plssvm::file_format_type::libsvm) + .def("data", [](const regression_data_set_wrapper &self) { return std::visit([](auto &&data) { return py::cast(data.data()); }, self.data_set); }, "the data saved as 2D vector") + .def("has_labels", [](const regression_data_set_wrapper &self) { return std::visit([](auto &&data) { return data.has_labels(); }, self.data_set); }, "check whether the data set has labels") + // clang-format off + .def("labels", [](const regression_data_set_wrapper &self) { + return std::visit([](auto &&data) { + if (!data.has_labels()) { + throw py::attribute_error{ "'RegressionDataSet' object has no function 'labels'. Maybe this RegressionDataSet was created without labels?" }; + } else { + return plssvm::bindings::python::util::vector_to_pyarray(data.labels()->get()); + } + }, self.data_set); }, "the labels") + // clang-format on + .def("num_data_points", [](const regression_data_set_wrapper &self) { return std::visit([](auto &&data) { return data.num_data_points(); }, self.data_set); }, "the number of data points in the data set") + .def("num_features", [](const regression_data_set_wrapper &self) { return std::visit([](auto &&data) { return data.num_features(); }, self.data_set); }, "the number of features per data point") + .def("is_scaled", [](const regression_data_set_wrapper &self) { return std::visit([](auto &&data) { return data.is_scaled(); }, self.data_set); }, "check whether the original data has been scaled to [lower, upper] bounds") + .def("scaling_factors", [](const regression_data_set_wrapper &self) { return std::visit([](auto &&data) { + if (!data.is_scaled()) { + throw py::attribute_error{ "'RegressionDataSet' object has no function 'scaling_factors'. Maybe this RegressionDataSet has not been scaled?" }; + } else { + return data.scaling_factors().value(); + } }, self.data_set); }, py::return_value_policy::reference_internal, "the factors used to scale this data set") + // clang-format off + .def("__repr__", [](const regression_data_set_wrapper &self) { + return std::visit([](auto &&data) { + std::string optional_repr{}; + if (data.is_scaled()) { + optional_repr += fmt::format(", scaling: [{}, {}]", + data.scaling_factors()->get().scaling_interval().first, + data.scaling_factors()->get().scaling_interval().second); + } + using label_type = typename plssvm::detail::remove_cvref_t::label_type; + return fmt::format("", + plssvm::bindings::python::util::python_type_name_mapping(), + data.num_data_points(), + data.num_features(), + optional_repr); + }, self.data_set); }); + // clang-format on +} diff --git a/bindings/Python/data_set/variant_wrapper.hpp b/bindings/Python/data_set/variant_wrapper.hpp new file mode 100644 index 000000000..55f5fb8a5 --- /dev/null +++ b/bindings/Python/data_set/variant_wrapper.hpp @@ -0,0 +1,125 @@ +/** + * @file + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Variant wrapper structs around PLSSVM classification and regression data sets. Used that we don't have to expose templates to the Python bindings. + */ + +#ifndef PLSSVM_BINDINGS_PYTHON_DATA_SET_WRAPPER_HPP_ +#define PLSSVM_BINDINGS_PYTHON_DATA_SET_WRAPPER_HPP_ +#pragma once + +#include "plssvm/data_set/classification_data_set.hpp" // plssvm::classification_data_set +#include "plssvm/data_set/regression_data_set.hpp" // plssvm::regression_data_set + +#include // fixed-width integers +#include // std::string +#include // std::move +#include // std::variant +#include // std::vector + +namespace plssvm::bindings::python::util { + +/** + * @brief A wrapper struct encapsulating all possible classification data sets. + */ +struct classification_data_set_wrapper { + /// A std::variant containing all possible classification data set label types. + using possible_vector_types = std::variant, // np.bool + std::vector, // np.int8 + std::vector, // np.uint8 + std::vector, // np.int16 + std::vector, // np.uint16 + std::vector, // np.int32 + std::vector, // np.uint32 + std::vector, // np.int64 + std::vector, // np.uint64 + std::vector, // np.float32 + std::vector, // np.float64 + std::vector>; // np.string + + /// A std::variant containing all possible classification data set types. + using possible_data_set_types = std::variant, // np.bool + plssvm::classification_data_set, // np.int8 + plssvm::classification_data_set, // np.uint8 + plssvm::classification_data_set, // np.int16 + plssvm::classification_data_set, // np.uint16 + plssvm::classification_data_set, // np.int32 + plssvm::classification_data_set, // np.uint32 + plssvm::classification_data_set, // np.int64 + plssvm::classification_data_set, // np.uint64 + plssvm::classification_data_set, // np.float32 + plssvm::classification_data_set, // np.float64 + plssvm::classification_data_set>; // np.str + + /** + * @brief Construct a new classification data set by setting the active std::variant member. + * @tparam T the label type of the classification data set + * @param[in] d the classification data set + */ + template + explicit classification_data_set_wrapper(plssvm::classification_data_set d) : + data_set{ std::move(d) } { } + + /** + * @brief Construct a new classification data set using the provided std::variant. + * @param[in] d the classification data set variant + */ + explicit classification_data_set_wrapper(possible_data_set_types d) : + data_set{ std::move(d) } { } + + /// The actual classification data set (active type in the std::variant). + possible_data_set_types data_set; +}; + +/** + * @brief A wrapper struct encapsulating all possible regression data sets. + */ +struct regression_data_set_wrapper { + /// A std::variant containing all possible regression data set label types. + using possible_vector_types = std::variant, // np.int16 + std::vector, // np.uint16 + std::vector, // np.int32 + std::vector, // np.uint32 + std::vector, // np.int64 + std::vector, // np.uint64 + std::vector, // np.float32 + std::vector>; // np.float64 + + /// A std::variant containing all possible regression data set types. + using possible_data_set_types = std::variant, // np.int16 + plssvm::regression_data_set, // np.uint16 + plssvm::regression_data_set, // np.int32 + plssvm::regression_data_set, // np.uint32 + plssvm::regression_data_set, // np.int64 + plssvm::regression_data_set, // np.uint64 + plssvm::regression_data_set, // np.float32 + plssvm::regression_data_set>; // np.float64 + + /** + * @brief Construct a new regression data set by setting the active std::variant member. + * @tparam T the label type of the regression data set + * @param[in] d the regression data set + */ + template + explicit regression_data_set_wrapper(plssvm::regression_data_set d) : + data_set{ std::move(d) } { } + + /** + * @brief Construct a new regression data set using the provided std::variant. + * @param[in] d the regression data set variant + */ + explicit regression_data_set_wrapper(possible_data_set_types d) : + data_set{ std::move(d) } { } + + /// The actual regression data set (active type in the std::variant). + possible_data_set_types data_set; +}; + +} // namespace plssvm::bindings::python::util + +#endif // PLSSVM_BINDINGS_PYTHON_DATA_SET_WRAPPER_HPP_ diff --git a/bindings/Python/detail/tracking/events.cpp b/bindings/Python/detail/tracking/events.cpp index 6c7e2eeac..86f3ae161 100644 --- a/bindings/Python/detail/tracking/events.cpp +++ b/bindings/Python/detail/tracking/events.cpp @@ -25,7 +25,7 @@ void init_events(py::module_ &m) { using event_type = plssvm::detail::tracking::events::event; // bind a single event - py::class_(performance_tracker_module, "Event") + py::class_(performance_tracker_module, "Event", "A class encapsulating a single event: name + timestamp where the event occurred.") .def(py::init(), "construct a new event using a time point and a name") .def_readonly("time_point", &event_type::time_point, "read the time point associated to this event") .def_readonly("name", &event_type::name, "read the name associated to this event") @@ -34,7 +34,7 @@ void init_events(py::module_ &m) { }); // bind the events wrapper - py::class_(performance_tracker_module, "Events") + py::class_(performance_tracker_module, "Events", "A class encapsulating all occurred events.") .def(py::init<>(), "construct an empty events wrapper") .def("add_event", py::overload_cast(&plssvm::detail::tracking::events::add_event), "add a new event") .def("add_event", py::overload_cast(&plssvm::detail::tracking::events::add_event), "add a new event using a time point and a name") diff --git a/bindings/Python/exceptions/exceptions.cpp b/bindings/Python/exceptions/exceptions.cpp index 18cd6b441..71c1f7a92 100644 --- a/bindings/Python/exceptions/exceptions.cpp +++ b/bindings/Python/exceptions/exceptions.cpp @@ -8,7 +8,7 @@ #include "plssvm/exceptions/exceptions.hpp" // PLSSVM specific exceptions -#include "bindings/Python/utility.hpp" // register_py_exception +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::register_py_exception #include "pybind11/pybind11.h" // py::module_, py::exception @@ -16,16 +16,18 @@ namespace py = pybind11; void init_exceptions(py::module_ &m, const py::exception &base_exception) { // register all basic PLSSVM exceptions - register_py_exception(m, "InvalidParameterError", base_exception); - register_py_exception(m, "FileReaderError", base_exception); - register_py_exception(m, "DataSetError", base_exception); - register_py_exception(m, "FileNotFoundError", base_exception); - register_py_exception(m, "InvalidFileFormatError", base_exception); - register_py_exception(m, "UnsupportedBackendError", base_exception); - register_py_exception(m, "UnsupportedKernelTypeError", base_exception); - register_py_exception(m, "GPUDevicePtrError", base_exception); - register_py_exception(m, "MatrixError", base_exception); - register_py_exception(m, "KernelLaunchResourcesError", base_exception); - register_py_exception(m, "ClassificationReportError", base_exception); - register_py_exception(m, "EnvironmentError", base_exception); + plssvm::bindings::python::util::register_py_exception(m, "InvalidParameterError", base_exception); + plssvm::bindings::python::util::register_py_exception(m, "FileReaderError", base_exception); + plssvm::bindings::python::util::register_py_exception(m, "DataSetError", base_exception); + plssvm::bindings::python::util::register_py_exception(m, "MinMaxScalerError", base_exception); + plssvm::bindings::python::util::register_py_exception(m, "FileNotFoundError", base_exception); + plssvm::bindings::python::util::register_py_exception(m, "InvalidFileFormatError", base_exception); + plssvm::bindings::python::util::register_py_exception(m, "UnsupportedBackendError", base_exception); + plssvm::bindings::python::util::register_py_exception(m, "UnsupportedKernelTypeError", base_exception); + plssvm::bindings::python::util::register_py_exception(m, "GPUDevicePtrError", base_exception); + plssvm::bindings::python::util::register_py_exception(m, "MatrixError", base_exception); + plssvm::bindings::python::util::register_py_exception(m, "KernelLaunchResourcesError", base_exception); + plssvm::bindings::python::util::register_py_exception(m, "ClassificationReportError", base_exception); + plssvm::bindings::python::util::register_py_exception(m, "RegressionReportError", base_exception); + plssvm::bindings::python::util::register_py_exception(m, "EnvironmentError", base_exception); } diff --git a/bindings/Python/file_format_types.cpp b/bindings/Python/file_format_types.cpp index 8b51a0851..4d50efedc 100644 --- a/bindings/Python/file_format_types.cpp +++ b/bindings/Python/file_format_types.cpp @@ -14,7 +14,7 @@ namespace py = pybind11; void init_file_format_types(py::module_ &m) { // bind enum class - py::enum_(m, "FileFormatType") + py::enum_(m, "FileFormatType", "Enum class for all supported file types.") .value("LIBSVM", plssvm::file_format_type::libsvm, "the LIBSVM file format (default); for the file format specification see: https://www.csie.ntu.edu.tw/~cjlin/libsvm/faq.html") .value("ARFF", plssvm::file_format_type::arff, "the ARFF file format; for the file format specification see: https://www.cs.waikato.ac.nz/~ml/weka/arff.html"); } diff --git a/bindings/Python/gamma.cpp b/bindings/Python/gamma.cpp index df710e986..36812224e 100644 --- a/bindings/Python/gamma.cpp +++ b/bindings/Python/gamma.cpp @@ -9,8 +9,9 @@ #include "plssvm/gamma.hpp" #include "plssvm/constants.hpp" // plssvm::real_type +#include "plssvm/matrix.hpp" // plssvm::aos_matrix -#include "bindings/Python/utility.hpp" // pyarray_to_matrix +#include "bindings/Python/type_caster/matrix_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::matrix #include "pybind11/pybind11.h" // py::module_, py::enum_ #include "pybind11/stl.h" // support for STL types: std::variant @@ -19,13 +20,13 @@ namespace py = pybind11; void init_gamma(py::module_ &m) { // bind enum class - py::enum_(m, "GammaCoefficientType") + py::enum_(m, "GammaCoefficientType", "Enum class for all possible gamma coefficient types (can also be a number).") .value("AUTOMATIC", plssvm::gamma_coefficient_type::automatic, "use a dynamic gamma value of 1 / num_features for the kernel functions") .value("SCALE", plssvm::gamma_coefficient_type::scale, "use a dynamic gamma value of 1 / (num_features * data.var()) for the kernel functions"); // bind free functions m.def("get_gamma_string", &plssvm::get_gamma_string, "get the gamma string based on the currently active variant member"); - m.def("calculate_gamma_value", [](const plssvm::gamma_type &gamma, py::array_t data) { - return plssvm::calculate_gamma_value(gamma, pyarray_to_matrix(data)); + m.def("calculate_gamma_value", [](const plssvm::gamma_type &gamma, const plssvm::aos_matrix &data) { + return plssvm::calculate_gamma_value(gamma, data); }); } diff --git a/bindings/Python/kernel_function_types.cpp b/bindings/Python/kernel_function_types.cpp index 9d8865aae..32fee950b 100644 --- a/bindings/Python/kernel_function_types.cpp +++ b/bindings/Python/kernel_function_types.cpp @@ -14,7 +14,7 @@ namespace py = pybind11; void init_kernel_function_types(py::module_ &m) { // bind enum class - py::enum_(m, "KernelFunctionType") + py::enum_(m, "KernelFunctionType", "Enum class for all implemented kernel functions in PLSSVM.") .value("LINEAR", plssvm::kernel_function_type::linear, "linear kernel function: ") .value("POLYNOMIAL", plssvm::kernel_function_type::polynomial, "polynomial kernel function: (gamma * + coef0)^degree") .value("RBF", plssvm::kernel_function_type::rbf, "radial basis function: exp(-gamma * ||u - v||^2)") diff --git a/bindings/Python/kernel_functions.cpp b/bindings/Python/kernel_functions.cpp index 9cb4fc42f..6342513f8 100644 --- a/bindings/Python/kernel_functions.cpp +++ b/bindings/Python/kernel_functions.cpp @@ -9,14 +9,15 @@ #include "plssvm/kernel_functions.hpp" // plssvm::kernel_function #include "plssvm/constants.hpp" // plssvm::real_type -#include "plssvm/gamma.hpp" // plssvm::calculate_gamma_value #include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type #include "plssvm/parameter.hpp" // plssvm::parameter +#include "fmt/format.h" // fmt::format #include "pybind11/pybind11.h" // py::module_, py::arg #include "pybind11/stl.h" // support for STL types: std::vector -#include // std::vector +#include // std::holds_alternative +#include // std::vector namespace py = pybind11; @@ -67,8 +68,11 @@ void init_kernel_functions(py::module_ &m) { py::arg("gamma")); m.def( - "kernel_function", [](const std::vector &x, const std::vector &y, plssvm::parameter params) { - // assume params.gamma holds a real_type + "kernel_function", [](const std::vector &x, const std::vector &y, const plssvm::parameter ¶ms) { + // check if params.gamma can be used -> must be a real_type! + if (params.kernel_type != plssvm::kernel_function_type::linear && !std::holds_alternative(params.gamma)) { + throw py::value_error{ fmt::format("In order to call 'kernel_function' the 'gamma' parameter must be a real_type, but is '{}'!", params.gamma) }; + } return plssvm::kernel_function(x, y, params); }, "apply the kernel function defined in the parameter object to two vectors"); diff --git a/bindings/Python/main.cpp b/bindings/Python/main.cpp index 1c1248fb2..616a0dec0 100644 --- a/bindings/Python/main.cpp +++ b/bindings/Python/main.cpp @@ -9,6 +9,7 @@ #include "plssvm/environment.hpp" // plssvm::environment::{initialize, finalize} #include "plssvm/exceptions/exceptions.hpp" // plssvm::exception +#include "plssvm/version/version.hpp" // plssvm::version::version #include "pybind11/pybind11.h" // PYBIND11_MODULE, py::module_, py::exception, py::register_exception_translator #include "pybind11/pytypes.h" // py::set_error @@ -23,6 +24,7 @@ void init_performance_tracker(py::module_ &); void init_events(py::module_ &); void init_target_platforms(py::module_ &); void init_solver_types(py::module_ &); +void init_svm_types(py::module_ &); void init_backend_types(py::module_ &); void init_gamma(py::module_ &); void init_classification_types(py::module_ &); @@ -30,11 +32,17 @@ void init_file_format_types(py::module_ &); void init_kernel_function_types(py::module_ &); void init_kernel_functions(py::module_ &); void init_parameter(py::module_ &); -void init_model(py::module_ &); -void init_data_set(py::module_ &); +void init_classification_model(py::module_ &); +void init_regression_model(py::module_ &); +void init_min_max_scaler(py::module_ &); +void init_classification_data_set(py::module_ &); +void init_regression_data_set(py::module_ &); void init_version(py::module_ &); void init_exceptions(py::module_ &, const py::exception &); +void init_regression_report(py::module_ &); void init_csvm(py::module_ &); +void init_csvc(py::module_ &, py::module_ &); +void init_csvr(py::module_ &, py::module_ &); void init_openmp_csvm(py::module_ &, const py::exception &); void init_hpx_csvm(py::module_ &, const py::exception &); void init_stdpar_csvm(py::module_ &, const py::exception &); @@ -43,10 +51,15 @@ void init_hip_csvm(py::module_ &, const py::exception &); void init_opencl_csvm(py::module_ &, const py::exception &); void init_sycl(py::module_ &, const py::exception &); void init_kokkos_csvm(py::module_ &, const py::exception &); -void init_sklearn(py::module_ &); +void init_sklearn_svc(py::module_ &); +void init_sklearn_svr(py::module_ &); PYBIND11_MODULE(plssvm, m) { - m.doc() = "Parallel Least Squares Support Vector Machine"; + m.doc() = "PLSSVM - Parallel Least Squares Support Vector Machine"; + m.attr("__version__") = plssvm::version::version; + + // create a pure-virtual module + py::module_ pure_virtual = m.def_submodule("__pure_virtual"); // automatically initialize the environments plssvm::environment::initialize(); @@ -79,6 +92,7 @@ PYBIND11_MODULE(plssvm, m) { init_target_platforms(m); init_solver_types(m); + init_svm_types(m); init_backend_types(m); init_gamma(m); init_classification_types(m); @@ -86,11 +100,17 @@ PYBIND11_MODULE(plssvm, m) { init_kernel_function_types(m); init_kernel_functions(m); init_parameter(m); - init_model(m); - init_data_set(m); + init_classification_model(m); + init_regression_model(m); + init_min_max_scaler(m); + init_classification_data_set(m); + init_regression_data_set(m); init_version(m); init_exceptions(m, base_exception); - init_csvm(m); + init_regression_report(m); + init_csvm(pure_virtual); + init_csvc(m, pure_virtual); + init_csvr(m, pure_virtual); // init bindings for the specific backends ONLY if the backend has been enabled #if defined(PLSSVM_HAS_OPENMP_BACKEND) @@ -118,5 +138,6 @@ PYBIND11_MODULE(plssvm, m) { init_kokkos_csvm(m, base_exception); #endif - init_sklearn(m); + init_sklearn_svc(m); + init_sklearn_svr(m); } diff --git a/bindings/Python/model.cpp b/bindings/Python/model.cpp deleted file mode 100644 index 009d16953..000000000 --- a/bindings/Python/model.cpp +++ /dev/null @@ -1,87 +0,0 @@ -/** - * @author Alexander Van Craen - * @author Marcel Breyer - * @copyright 2018-today The PLSSVM project - All Rights Reserved - * @license This file is part of the PLSSVM project which is released under the MIT license. - * See the LICENSE.md file in the project root for full license information. - */ - -#include "plssvm/model.hpp" // plssvm::model - -#include "plssvm/constants.hpp" // plssvm::real_type -#include "plssvm/detail/type_list.hpp" // plssvm::detail::label_type_list -#include "plssvm/matrix.hpp" // plssvm::aos_matrix - -#include "bindings/Python/utility.hpp" // assemble_unique_class_name, vector_to_pyarray, matrix_to_pyarray - -#include "fmt/format.h" // fmt::format -#include "pybind11/pybind11.h" // py::module_, py::class_, py::return_value_policy -#include "pybind11/stl.h" // support for STL types: std::vector - -#include // std::size_t -#include // std::string -#include // std::tuple_element_t, std::tuple_size_v -#include // std::is_same_v -#include // std::integer_sequence, std::make_integer_sequence - -namespace py = pybind11; - -template -void instantiate_model_bindings(py::module_ &m, label_type) { - using model_type = plssvm::model; - - const std::string class_name = assemble_unique_class_name("Model"); - - py::class_(m, class_name.c_str()) - .def(py::init(), "load a previously learned model from a file") - .def("save", &model_type::save, "save the current model to a file") - .def("num_support_vectors", &model_type::num_support_vectors, "the number of support vectors (note: all training points become support vectors for LSSVMs)") - .def("num_features", &model_type::num_features, "the number of features of the support vectors") - .def("get_params", &model_type::get_params, py::return_value_policy::reference_internal, "the SVM parameter used to learn this model") - .def("support_vectors", [](const model_type &self) { return matrix_to_pyarray(self.support_vectors()); }, "the support vectors (note: all training points become support vectors for LSSVMs)") - .def("labels", [](const model_type &self) { - if constexpr (std::is_same_v) { - return self.labels(); - } else { - return vector_to_pyarray(self.labels()); - } }, "the labels") - .def("num_classes", &model_type::num_classes, "the number of classes") - .def("classes", [](const model_type &self) { - if constexpr (std::is_same_v) { - return self.classes(); - } else { - return vector_to_pyarray(self.classes()); - } }, "the classes") - .def("weights", []([[maybe_unused]] const model_type &self) { - py::list ret{}; - for (const plssvm::aos_matrix &matr : self.weights()) { - ret.append(matrix_to_pyarray(matr)); - } - return ret; }, "the weights learned for each support vector and class") - .def("rho", [](const model_type &self) { return vector_to_pyarray(self.rho()); }, "the bias value after learning for each class") - .def("get_classification_type", [](const model_type &self) { return self.get_classification_type(); }, "the classification type used to create this model") - .def("__repr__", [class_name](const model_type &self) { return fmt::format("", - class_name, - self.num_support_vectors(), - self.num_features(), - fmt::format("[{}]", fmt::join(self.rho(), ",")), - self.get_classification_type()); }); -} - -template -void instantiate_model_bindings(py::module_ &m, std::integer_sequence) { - (instantiate_model_bindings(m, std::tuple_element_t{}), ...); -} - -template -void instantiate_model_bindings(py::module_ &m) { - instantiate_model_bindings(m, std::make_integer_sequence>{}); -} - -void init_model(py::module_ &m) { - // bind all model classes - instantiate_model_bindings(m); - - // create alias - m.attr("Model") = m.attr(assemble_unique_class_name("Model").c_str()); -} diff --git a/bindings/Python/model/classification_model.cpp b/bindings/Python/model/classification_model.cpp new file mode 100644 index 000000000..b0ebcc45b --- /dev/null +++ b/bindings/Python/model/classification_model.cpp @@ -0,0 +1,85 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "plssvm/model/classification_model.hpp" // plssvm::classification_model + +#include "plssvm/constants.hpp" // plssvm::real_type +#include "plssvm/detail/type_traits.hpp" // plssvm::detail::remove_cvref_t +#include "plssvm/matrix.hpp" // plssvm::aos_matrix + +#include "bindings/Python/model/variant_wrapper.hpp" // plssvm::bindings::python::util::classification_model_wrapper +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{python_type_name_mapping, create_instance, vector_to_pyarray} + +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::arg, py::pos_only, py::array, py::list +#include "pybind11/pytypes.h" // py::type +#include "pybind11/stl.h" // support for STL types: std::vector + +#include // std::make_unique +#include // std::optional, std::make_optional, std::nullopt +#include // std::string +#include // std::visit + +namespace py = pybind11; + +void init_classification_model(py::module_ &m) { + using plssvm::bindings::python::util::classification_model_wrapper; + + py::class_(m, "ClassificationModel", "Implements a class encapsulating the result of a call to the C-SVC fit function. A model is used to predict the labels of a new data set.") + .def(py::init([](const std::string &filename, const std::optional type) { + if (type.has_value()) { + return std::make_unique(plssvm::bindings::python::util::create_instance(type.value(), filename)); + } else { + return std::make_unique(plssvm::classification_model{ filename }); + } + }), + "load a previously learned classification model from a file", + py::arg("filename"), + py::pos_only(), + py::arg("type") = std::nullopt) + .def("save", [](const classification_model_wrapper &self, const std::string &filename) { return std::visit([&filename](auto &&model) { model.save(filename); }, self.model); }, "save the current model to a file") + .def("num_support_vectors", [](const classification_model_wrapper &self) { return std::visit([](auto &&model) { return model.num_support_vectors(); }, self.model); }, "the number of support vectors (note: all training points become support vectors for LS-SVMs)") + .def("num_features", [](const classification_model_wrapper &self) { return std::visit([](auto &&model) { return model.num_features(); }, self.model); }, "the number of features of the support vectors") + .def("get_params", [](const classification_model_wrapper &self) { return std::visit([](auto &&model) { return model.get_params(); }, self.model); }, "the C-SVC hyper-parameters used to learn this model") + .def("support_vectors", [](const classification_model_wrapper &self) { return std::visit([](auto &&model) { return py::cast(model.support_vectors()); }, self.model); }, "the support vectors (note: all training points become support vectors for LS-SVMs)") + // clang-format off + .def("labels", [](const classification_model_wrapper &self) { + return std::visit([](auto &&model) -> std::optional { + if (model.labels().has_value()) { + return std::make_optional(plssvm::bindings::python::util::vector_to_pyarray(model.labels()->get())); + } else { + return std::nullopt; + } + }, self.model); }, "the labels") + .def("weights", [](const classification_model_wrapper &self) { + return std::visit([](auto &&model) { + py::list ret{}; + for (const plssvm::aos_matrix &matr : model.weights()) { + ret.append(py::cast(matr)); + } + return ret; + }, self.model); }, "the weights learned for each support vector and class") + // clang-format on + .def("rho", [](const classification_model_wrapper &self) { return std::visit([](auto &&model) { return plssvm::bindings::python::util::vector_to_pyarray(model.rho()); }, self.model); }, "the bias value after learning for each class") + .def("num_classes", [](const classification_model_wrapper &self) { return std::visit([](auto &&model) { return model.num_classes(); }, self.model); }, "the number of classes") + .def("classes", [](const classification_model_wrapper &self) { return std::visit([](auto &&model) { return plssvm::bindings::python::util::vector_to_pyarray(model.classes()); }, self.model); }, "the classes") + .def("get_classification_type", [](const classification_model_wrapper &self) { return std::visit([](auto &&model) { return model.get_classification_type(); }, self.model); }, "the classification type used to create this model") + // clang-format off + .def("__repr__", [](const classification_model_wrapper &self) { + return std::visit([](auto &&model) { + using label_type = typename plssvm::detail::remove_cvref_t::label_type; + return fmt::format("", + plssvm::bindings::python::util::python_type_name_mapping(), + model.num_support_vectors(), + model.num_features(), + fmt::join(model.rho(), ","), + model.get_classification_type()); + }, self.model); }); + // clang-format on +} diff --git a/bindings/Python/model/regression_model.cpp b/bindings/Python/model/regression_model.cpp new file mode 100644 index 000000000..52989cdfd --- /dev/null +++ b/bindings/Python/model/regression_model.cpp @@ -0,0 +1,81 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "plssvm/model/regression_model.hpp" // plssvm::regression_model + +#include "plssvm/constants.hpp" // plssvm::real_type +#include "plssvm/detail/type_traits.hpp" // plssvm::detail::remove_cvref_t +#include "plssvm/matrix.hpp" // plssvm::aos_matrix + +#include "bindings/Python/model/variant_wrapper.hpp" // plssvm::bindings::python::util::regression_model_wrapper +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{python_type_name_mapping, create_instance, vector_to_pyarray} + +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::arg, py::pos_only, py::array, py::list +#include "pybind11/pytypes.h" // py::type +#include "pybind11/stl.h" // support for STL types: std::vector + +#include // std::make_unique +#include // std::optional, std::make_optional, std::nullopt +#include // std::string +#include // std::visit + +namespace py = pybind11; + +void init_regression_model(py::module_ &m) { + using plssvm::bindings::python::util::regression_model_wrapper; + + py::class_(m, "RegressionModel", "Implements a class encapsulating the result of a call to the C-SVR fit function. A model is used to predict the labels of a new data set.") + .def(py::init([](const std::string &filename, const std::optional type) { + if (type.has_value()) { + return std::make_unique(plssvm::bindings::python::util::create_instance(type.value(), filename)); + } else { + return std::make_unique(plssvm::regression_model{ filename }); + } + }), + "load a previously learned regression model from a file", + py::arg("filename"), + py::pos_only(), + py::arg("type") = std::nullopt) + .def("save", [](const regression_model_wrapper &self, const std::string &filename) { return std::visit([&filename](auto &&model) { model.save(filename); }, self.model); }, "save the current model to a file") + .def("num_support_vectors", [](const regression_model_wrapper &self) { return std::visit([](auto &&model) { return model.num_support_vectors(); }, self.model); }, "the number of support vectors (note: all training points become support vectors for LS-SVMs)") + .def("num_features", [](const regression_model_wrapper &self) { return std::visit([](auto &&model) { return model.num_features(); }, self.model); }, "the number of features of the support vectors") + .def("get_params", [](const regression_model_wrapper &self) { return std::visit([](auto &&model) { return model.get_params(); }, self.model); }, "the C-SVR hyper-parameters used to learn this model") + .def("support_vectors", [](const regression_model_wrapper &self) { return std::visit([](auto &&model) { return py::cast(model.support_vectors()); }, self.model); }, "the support vectors (note: all training points become support vectors for LS-SVMs)") + // clang-format off + .def("labels", [](const regression_model_wrapper &self) { + return std::visit([](auto &&model) -> std::optional { + if (model.labels().has_value()) { + return std::make_optional(plssvm::bindings::python::util::vector_to_pyarray(model.labels()->get())); + } else { + return std::nullopt; + } + }, self.model); }, "the labels") + .def("weights", [](const regression_model_wrapper &self) { + return std::visit([](auto &&model) { + py::list ret{}; + for (const plssvm::aos_matrix &matr : model.weights()) { + ret.append(py::cast(matr)); + } + return ret; }, + self.model); }, "the weights learned for each support vector") + // clang-format on + .def("rho", [](const regression_model_wrapper &self) { return std::visit([](auto &&model) { return plssvm::bindings::python::util::vector_to_pyarray(model.rho()); }, self.model); }, "the bias value after learning") + // clang-format off + .def("__repr__", [](const regression_model_wrapper &self) { + return std::visit([](auto &&model) { + using label_type = typename plssvm::detail::remove_cvref_t::label_type; + return fmt::format("", + plssvm::bindings::python::util::python_type_name_mapping(), + model.num_support_vectors(), + model.num_features(), + fmt::join(model.rho(), ",")); + }, self.model); }); + // clang-format on +} diff --git a/bindings/Python/model/variant_wrapper.hpp b/bindings/Python/model/variant_wrapper.hpp new file mode 100644 index 000000000..ab5a063a5 --- /dev/null +++ b/bindings/Python/model/variant_wrapper.hpp @@ -0,0 +1,100 @@ +/** + * @file + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Variant wrapper structs around PLSSVM classification and regression models. Used that we don't have to expose templates to the Python bindings. + */ + +#ifndef PLSSVM_BINDINGS_PYTHON_MODEL_WRAPPER_HPP_ +#define PLSSVM_BINDINGS_PYTHON_MODEL_WRAPPER_HPP_ +#pragma once + +#include "plssvm/model/classification_model.hpp" // plssvm::classification_model +#include "plssvm/model/regression_model.hpp" // plssvm::regression_model + +#include // fixed-width integers +#include // std::string +#include // std::move +#include // std::variant + +namespace plssvm::bindings::python::util { + +/** + * @brief A wrapper struct encapsulating all possible classification models. + */ +struct classification_model_wrapper { + /// A std::variant containing all possible classification model types. + using possible_model_types = std::variant, // np.bool + plssvm::classification_model, // np.int8 + plssvm::classification_model, // np.uint8 + plssvm::classification_model, // np.int16 + plssvm::classification_model, // np.uint16 + plssvm::classification_model, // np.int32 + plssvm::classification_model, // np.uint32 + plssvm::classification_model, // np.int64 + plssvm::classification_model, // np.uint64 + plssvm::classification_model, // np.float32 + plssvm::classification_model, // np.float64 + plssvm::classification_model>; // np.str + + /** + * @brief Construct a new classification model by setting the active std::variant member. + * @tparam T the label type of the classification model + * @param[in] m the classification model + */ + template + explicit classification_model_wrapper(plssvm::classification_model m) : + model{ std::move(m) } { } + + /** + * @brief Construct a new classification model using the provided std::variant. + * @param[in] m the classification model variant + */ + explicit classification_model_wrapper(possible_model_types m) : + model{ std::move(m) } { } + + /// The actual classification model (active type in the std::variant). + possible_model_types model; +}; + +/** + * @brief A wrapper struct encapsulating all possible regression models. + */ +struct regression_model_wrapper { + /// A std::variant containing all possible regression model types. + using possible_model_types = std::variant, // np.int16 + plssvm::regression_model, // np.uint16 + plssvm::regression_model, // np.int32 + plssvm::regression_model, // np.uint32 + plssvm::regression_model, // np.int64 + plssvm::regression_model, // np.uint64 + plssvm::regression_model, // np.float32 + plssvm::regression_model>; // np.float64 + + /** + * @brief Construct a new regression model by setting the active std::variant member. + * @tparam T the label type of the regression model + * @param[in] m the regression model + */ + template + explicit regression_model_wrapper(plssvm::regression_model m) : + model{ std::move(m) } { } + + /** + * @brief Construct a new regression model using the provided std::variant. + * @param[in] m the regression model variant + */ + explicit regression_model_wrapper(possible_model_types m) : + model{ std::move(m) } { } + + /// The actual regression model (active type in the std::variant). + possible_model_types model; +}; + +} // namespace plssvm::bindings::python::util + +#endif // PLSSVM_BINDINGS_PYTHON_MODEL_WRAPPER_HPP_ diff --git a/bindings/Python/parameter.cpp b/bindings/Python/parameter.cpp index 4d0cffde5..6868f18d6 100644 --- a/bindings/Python/parameter.cpp +++ b/bindings/Python/parameter.cpp @@ -12,7 +12,7 @@ #include "plssvm/gamma.hpp" // plssvm::gamma_type #include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type -#include "bindings/Python/utility.hpp" +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_kwargs_to_parameter} #include "fmt/format.h" // fmt::format #include "pybind11/operators.h" // support for operators @@ -23,22 +23,22 @@ namespace py = pybind11; void init_parameter(py::module_ &m) { // bind parameter class - py::class_(m, "Parameter") - .def(py::init<>()) - .def(py::init()) + py::class_(m, "Parameter", "A class for encapsulating all important C-SVM hyper-parameters.") + .def(py::init<>(), "default construct all hyper-parameters") + .def(py::init(), "create a new Parameter object providing all hyper-parameters explicitly") .def(py::init([](const py::kwargs &args) { // check for valid keys - check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); // if one of the value named parameter is provided, set the respective value - return convert_kwargs_to_parameter(args); + return plssvm::bindings::python::util::convert_kwargs_to_parameter(args); }), - "create a new SVM parameter object") + "create a new Parameter object with the optionally provided hyper-parameter values") .def_property( "kernel_type", [](const plssvm::parameter &self) { return self.kernel_type; }, [](plssvm::parameter &self, const plssvm::kernel_function_type kernel_type) { self.kernel_type = kernel_type; }, py::return_value_policy::reference, - "change the used kernel function: linear, polynomial, and rbf") + "change the used kernel function: linear, polynomial, rbf, sigmoid, laplacian, or chi_squared") .def_property( "degree", [](const plssvm::parameter &self) { return self.degree; }, @@ -50,20 +50,20 @@ void init_parameter(py::module_ &m) { [](const plssvm::parameter &self) { return self.gamma; }, [](plssvm::parameter &self, const plssvm::gamma_type &gamma) { self.gamma = gamma; }, py::return_value_policy::reference, - "change the gamma parameter for the polynomial and rbf kernel functions") + "change the gamma parameter for all kernel functions except the linear one") .def_property( "coef0", [](const plssvm::parameter &self) { return self.coef0; }, [](plssvm::parameter &self, const plssvm::real_type coef0) { self.coef0 = coef0; }, py::return_value_policy::reference, - "change the coef0 parameter for the polynomial kernel function") + "change the coef0 parameter for the polynomial and sigmoid kernel functions") .def_property( "cost", [](const plssvm::parameter &self) { return self.cost; }, [](plssvm::parameter &self, const plssvm::real_type cost) { self.cost = cost; }, py::return_value_policy::reference, - "change the cost parameter for the CSVM") - .def("equivalent", &plssvm::parameter::equivalent, "check whether two parameter objects are equivalent, i.e., the SVM parameter important for the current 'kernel_type' are the same") + "change the cost parameter for the C-SVM") + .def("equivalent", &plssvm::parameter::equivalent, "check whether two parameter objects are equivalent, i.e., the SVM hyper-parameters important for the current 'kernel_type' are the same") .def(py::self == py::self, "check whether two parameter objects are identical") .def(py::self != py::self, "check whether two parameter objects are different") .def("__repr__", [](const plssvm::parameter &self) { @@ -76,5 +76,5 @@ void init_parameter(py::module_ &m) { }); // bind free functions - m.def("equivalent", &plssvm::equivalent, "check whether two parameter objects are equivalent, i.e., the SVM parameter important for the current 'kernel_type' are the same"); + m.def("equivalent", &plssvm::equivalent, "check whether two parameter objects are equivalent, i.e., the SVM hyper-parameters important for the current 'kernel_type' are the same"); } diff --git a/bindings/Python/regression_report.cpp b/bindings/Python/regression_report.cpp new file mode 100644 index 000000000..fa4933a25 --- /dev/null +++ b/bindings/Python/regression_report.cpp @@ -0,0 +1,57 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "plssvm/regression_report.hpp" // plssvm::regression_report + +#include "plssvm/detail/type_traits.hpp" // plssvm::detail::remove_cvref_t + +#include "bindings/Python/data_set/variant_wrapper.hpp" // plssvm::bindings::python::util::regression_data_set_wrapper +#include "bindings/Python/type_caster/label_vector_wrapper_caster.hpp" // a custom Pybind11 type caster for a plssvm::bindings::python::util::label_vector_wrapper + +#include "fmt/format.h" // fmt::format +#include "pybind11/pybind11.h" // py::module_, py::init, py::arg, py::pos_only, py::value_error +#include "pybind11/pytypes.h" // py::object +#include "pybind11/stl.h" // support for STL types + +#include // std::string +#include // std::visit, std::get + +namespace py = pybind11; + +void init_regression_report(py::module_ &m) { + // bind regression_report class + m.def("regression_report", [](plssvm::bindings::python::util::label_vector_wrapper y_true, plssvm::bindings::python::util::label_vector_wrapper y_pred, const bool force_finite, const bool output_dict) -> py::object { + using plssvm::bindings::python::util::regression_data_set_wrapper; + + // check that the data types are equal + if (!y_true.dtype.equal(y_pred.dtype)) { + throw py::value_error{ fmt::format(R"(The type of the correct labels "{}" differs from the type of the predicted labels "{}"!)", y_true.dtype.attr("name").cast(), y_pred.dtype.attr("name").cast()) }; + } + + return std::visit([&](auto &&correct_label) -> py::object { + using vector_type = plssvm::detail::remove_cvref_t; + const plssvm::regression_report report{ correct_label, std::get(y_pred.labels), plssvm::regression_report::force_finite = force_finite }; + + if (output_dict) { + // get the metrics + const plssvm::regression_report::metric metrics = report.loss(); + + // fill the Python dictionary + py::dict dict{}; + dict["explained_variance_score"] = metrics.explained_variance_score; + dict["mean_absolute_error"] = metrics.mean_absolute_error; + dict["mean_squared_error"] = metrics.mean_squared_error; + dict["r2_score"] = metrics.r2_score; + dict["squared_correlation_coefficient"] = metrics.squared_correlation_coefficient; + return dict; + } else { + return py::str(fmt::format("{}", report)); + } + }, + y_true.labels); }, "create a new regression report by calculating all metrics between the correct and predicted labels", py::arg("y_true"), py::arg("y_pred"), py::pos_only(), py::arg("force_finite") = true, py::arg("output_dict") = false); +} diff --git a/bindings/Python/sklearn.cpp b/bindings/Python/sklearn.cpp deleted file mode 100644 index e94e05cd8..000000000 --- a/bindings/Python/sklearn.cpp +++ /dev/null @@ -1,491 +0,0 @@ -/** - * @author Alexander Van Craen - * @author Marcel Breyer - * @copyright 2018-today The PLSSVM project - All Rights Reserved - * @license This file is part of the PLSSVM project which is released under the MIT license. - * See the LICENSE.md file in the project root for full license information. - */ - -#include "plssvm/core.hpp" - -#include "bindings/Python/utility.hpp" // check_kwargs_for_correctness, assemble_unique_class_name, pyarray_to_vector, pyarray_to_matrix - -#include "fmt/format.h" // fmt::format -#include "pybind11/numpy.h" // support for STL types -#include "pybind11/operators.h" // support for operators -#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::arg, py::return_value_policy, py::self -#include "pybind11/stl.h" // support for STL types - -#include // std::fill -#include // std::size_t -#include // std::int32_t -#include // std::map -#include // std::unique_ptr, std::make_unique -#include // std::optional, std::nullopt -#include // std::string -#include // std::move -#include // std::holds_alternative -#include // std::vector - -namespace py = pybind11; - -// TODO: implement missing functionality (as far es possible) - -// dummy -struct svc { - // the types - using real_type = plssvm::real_type; - using label_type = PLSSVM_PYTHON_BINDINGS_PREFERRED_LABEL_TYPE; - using data_set_type = plssvm::data_set; - using model_type = plssvm::model; - - std::optional epsilon{}; - std::optional max_iter{}; - plssvm::classification_type classification{ plssvm::classification_type::oaa }; - - std::unique_ptr svm_{ plssvm::make_csvm() }; - std::unique_ptr data_{}; - std::unique_ptr model_{}; -}; - -void parse_provided_params(svc &self, const py::kwargs &args) { - // check keyword arguments - check_kwargs_for_correctness(args, { "C", "kernel", "degree", "gamma", "coef0", "shrinking", "probability", "tol", "cache_size", "class_weight", "verbose", "max_iter", "decision_function_shape", "break_ties", "random_state", "classification" }); - - if (args.contains("C")) { - self.svm_->set_params(plssvm::cost = args["C"].cast()); - } - if (args.contains("kernel")) { - const auto kernel_str = args["kernel"].cast(); - plssvm::kernel_function_type kernel{}; - if (kernel_str == "linear") { - kernel = plssvm::kernel_function_type::linear; - } else if (kernel_str == "poly") { - kernel = plssvm::kernel_function_type::polynomial; - } else if (kernel_str == "rbf") { - kernel = plssvm::kernel_function_type::rbf; - } else if (kernel_str == "sigmoid") { - kernel = plssvm::kernel_function_type::sigmoid; - } else if (kernel_str == "laplacian") { - kernel = plssvm::kernel_function_type::laplacian; - } else if (kernel_str == "chi_squared") { - kernel = plssvm::kernel_function_type::chi_squared; - } else if (kernel_str == "precomputed") { - throw py::attribute_error{ R"(The "kernel = 'precomputed'" parameter for a call to the 'SVC' constructor is not implemented yet!)" }; - } else { - throw py::value_error{ fmt::format("'{}' is not in list", kernel_str) }; - } - self.svm_->set_params(plssvm::kernel_type = kernel); - } else { - // sklearn default kernel is the rbf kernel - self.svm_->set_params(plssvm::kernel_type = plssvm::kernel_function_type::rbf); - } - if (args.contains("degree")) { - self.svm_->set_params(plssvm::degree = args["degree"].cast()); - } - if (args.contains("gamma")) { - const plssvm::gamma_type gamma = convert_gamma_kwarg_to_variant(args); - if (std::holds_alternative(gamma)) { - self.svm_->set_params(plssvm::gamma = std::get(gamma)); - } else { - self.svm_->set_params(plssvm::gamma = std::get(gamma)); - } - } - if (args.contains("coef0")) { - self.svm_->set_params(plssvm::coef0 = args["coef0"].cast()); - } - if (args.contains("shrinking")) { - throw py::attribute_error{ "The 'shrinking' parameter for a call to the 'SVC' constructor is not implemented yet!" }; - } - if (args.contains("probability")) { - throw py::attribute_error{ "The 'probability' parameter for a call to the 'SVC' constructor is not implemented yet!" }; - } - if (args.contains("tol")) { - self.epsilon = args["tol"].cast(); - } - if (args.contains("cache_size")) { - throw py::attribute_error{ "The 'cache_size' parameter for a call to the 'SVC' constructor is not implemented yet!" }; - } - if (args.contains("class_weight")) { - throw py::attribute_error{ "The 'class_weight' parameter for a call to the 'SVC' constructor is not implemented yet!" }; - } - if (args.contains("verbose")) { - if (args["verbose"].cast()) { - if (plssvm::verbosity == plssvm::verbosity_level::quiet) { - // if current verbosity is quiet, override with full verbosity, since 'verbose=TRUE' should never result in no output - plssvm::verbosity = plssvm::verbosity_level::full; - } - // otherwise: use currently active verbosity level - } else { - plssvm::verbosity = plssvm::verbosity_level::quiet; - } - } else { - // sklearn default is quiet - plssvm::verbosity = plssvm::verbosity_level::quiet; - } - if (args.contains("max_iter")) { - const auto max_iter = args["max_iter"].cast(); - if (max_iter > 0) { - // use provided value - self.max_iter = static_cast(max_iter); - } else if (max_iter == -1) { - // default behavior in PLSSVM -> do nothing - } else { - // invalid max_iter provided - throw py::value_error{ fmt::format("max_iter must either be greater than zero or -1, got {}!", max_iter) }; - } - } - if (args.contains("decision_function_shape")) { - const std::string &dfs = args["decision_function_shape"].cast(); - if (dfs == "ovo") { - self.classification = plssvm::classification_type::oao; - } else if (dfs == "ovr") { - self.classification = plssvm::classification_type::oaa; - } else { - throw py::value_error{ fmt::format("decision_function_shape must be either 'ovr' or 'ovo', got {}.", dfs) }; - } - } - if (args.contains("break_ties")) { - throw py::attribute_error{ "The 'break_ties' parameter for a call to the 'SVC' constructor is not implemented yet!" }; - } - if (args.contains("random_state")) { - throw py::attribute_error{ "The 'random_state' parameter for a call to the 'SVC' constructor is not implemented yet!" }; - } -} - -void fit(svc &self) { - // perform sanity checks - if (self.svm_->get_params().cost <= plssvm::real_type{ 0.0 }) { - throw py::value_error{ "C <= 0" }; - } - if (self.svm_->get_params().degree < 0) { - throw py::value_error{ "degree of polynomial kernel < 0" }; - } - if (self.epsilon.has_value() && self.epsilon.value() <= plssvm::real_type{ 0.0 }) { - throw py::value_error{ "eps <= 0" }; - } - - // fit the model using potentially provided keyword arguments - if (self.epsilon.has_value() && self.max_iter.has_value()) { - self.model_ = std::make_unique(self.svm_->fit(*self.data_, - plssvm::classification = self.classification, - plssvm::epsilon = self.epsilon.value(), - plssvm::max_iter = self.max_iter.value())); - } else if (self.epsilon.has_value()) { - self.model_ = std::make_unique(self.svm_->fit(*self.data_, - plssvm::classification = self.classification, - plssvm::epsilon = self.epsilon.value())); - } else if (self.max_iter.has_value()) { - self.model_ = std::make_unique(self.svm_->fit(*self.data_, - plssvm::classification = self.classification, - plssvm::max_iter = self.max_iter.value())); - } else { - self.model_ = std::make_unique(self.svm_->fit(*self.data_, - plssvm::classification = self.classification)); - } -} - -template -[[nodiscard]] std::vector calculate_sv_indices_per_class(const svc &self) { - std::map> indices_per_class{}; - // init index-map map - for (const typename svc::label_type &label : self.model_->classes()) { - indices_per_class.insert({ label, std::vector{} }); - } - // sort the indices into the respective bucket based on their associated class - for (std::size_t idx = 0; idx < self.model_->num_support_vectors(); ++idx) { - indices_per_class[self.model_->labels()[idx]].push_back(static_cast(idx)); - } - // convert map values to vector - std::vector support{}; - support.reserve(self.model_->num_support_vectors()); - for (const auto &[label, indices] : indices_per_class) { - support.insert(support.cend(), indices.cbegin(), indices.cend()); - } - return support; -} - -void init_sklearn(py::module_ &m) { - // documentation based on sklearn.svm.SVC documentation - py::class_ py_svc(m, "SVC"); - py_svc.def(py::init([](const py::kwargs &args) { - // to silence constructor messages - if (args.contains("verbose")) { - if (args["verbose"].cast()) { - if (plssvm::verbosity == plssvm::verbosity_level::quiet) { - // if current verbosity is quiet, override with full verbosity, since 'verbose=TRUE' should never result in no output - plssvm::verbosity = plssvm::verbosity_level::full; - } - // otherwise: use currently active verbosity level - } else { - plssvm::verbosity = plssvm::verbosity_level::quiet; - } - } else { - // sklearn default is quiet - plssvm::verbosity = plssvm::verbosity_level::quiet; - } - - // create SVC class - auto self = std::make_unique(); - parse_provided_params(*self, args); - return self; - }), - "Construct a new SVM classifier."); - - //*************************************************************************************************************************************// - // ATTRIBUTES // - //*************************************************************************************************************************************// - py_svc.def_property_readonly("class_weight_", [](const svc &self) { - if (self.model_ == nullptr) { - throw py::attribute_error{ "'SVC' object has no attribute 'class_weight_'" }; - } else { - // note: constant zero since the class_weight parameter is currently not supported - const auto size = static_cast(self.model_->num_classes()); - py::array_t py_array(size); - const py::buffer_info buffer = py_array.request(); - auto ptr = static_cast(buffer.ptr); - std::fill(ptr, ptr + size, plssvm::real_type{ 1.0 }); - return py_array; - } - }) - .def_property_readonly("classes_", [](const svc &self) { - if (self.model_ == nullptr) { - throw py::attribute_error{ "'SVC' object has no attribute 'classes_'" }; - } else { - return vector_to_pyarray(self.data_->classes().value()); - } }, "The classes labels. ndarray of shape (n_classes,)") - .def_property_readonly("coef_", [](const svc &) { throw py::attribute_error{ "'SVC' object has no attribute 'coef_' (not implemented)" }; }) - .def_property_readonly("dual_coef_", [](const svc &) { throw py::attribute_error{ "'SVC' object has no attribute 'dual_coef_' (not implemented)" }; }) - .def_property_readonly("fit_status_", [](const svc &self) { - if (self.model_ == nullptr) { - throw py::attribute_error{ "'SVC' object has no attribute 'fit_status_'" }; - } else { - return 0; - } }, "0 if correctly fitted, 1 otherwise (will raise exception). int") - .def_property_readonly("intercept_", [](const svc &) { throw py::attribute_error{ "'SVC' object has no attribute 'intercept_' (not implemented)" }; }) - .def_property_readonly("n_features_in_", [](const svc &self) { - if (self.model_ == nullptr) { - throw py::attribute_error{ "'SVC' object has no attribute 'n_features_in_'" }; - } else { - return static_cast(self.data_->num_features()); - } }, "Number of features seen during fit. int") - .def_property_readonly("feature_names_in_", [](const svc &) { throw py::attribute_error{ "'SVC' object has no attribute 'feature_names_in_' (not implemented)" }; }) - .def_property_readonly("n_iter_", [](const svc &self) { - if (self.model_ == nullptr) { - throw py::attribute_error{ "'SVC' object has no attribute 'support_'" }; - } else { - return vector_to_pyarray(self.model_->num_iters().value()); - } }) - .def_property_readonly("support_", [](const svc &self) { - if (self.model_ == nullptr) { - throw py::attribute_error{ "'SVC' object has no attribute 'support_'" }; - } else { - return vector_to_pyarray(calculate_sv_indices_per_class(self)); - } }, "Indices of support vectors. ndarray of shape (n_SV)") - .def_property_readonly("support_vectors_", [](const svc &self) { - if (self.model_ == nullptr) { - throw py::attribute_error{ "'SVC' object has no attribute 'support_vectors_'" }; - } else { - // get the sorted indices - const std::vector support = calculate_sv_indices_per_class(self); - // convert support vectors matrix to 2d vector - std::vector> sv = self.model_->support_vectors().to_2D_vector(); - - // sort support vectors by their class - std::vector> sorted_sv{}; - sorted_sv.reserve(sv.size()); - for (const int idx : support) { - sorted_sv.push_back(std::move(sv[idx])); - } - - // convert 2D vector back to plssvm::matrix - return matrix_to_pyarray(plssvm::aos_matrix{ std::move(sorted_sv) }); - } }, "Support vectors. ndarray of shape (n_SV, n_features)") - .def_property_readonly("n_support_", [](const svc &self) { - if (self.model_ == nullptr) { - throw py::attribute_error{ "'SVC' object has no attribute 'n_support_'" }; - } else { - std::map occurrences{}; - // init count map - for (const typename svc::label_type &label : self.model_->classes()) { - occurrences.insert({ label, std::int32_t{ 0 } }); - } - // count occurrences - for (const typename svc::label_type &label : self.model_->labels()) { - ++occurrences[label]; - } - // convert map values to vector - std::vector n_support{}; - n_support.reserve(occurrences.size()); - for (const auto &[label, n_sv] : occurrences) { - n_support.push_back(n_sv); - } - // convert to Numpy array - return vector_to_pyarray(n_support); - } }, "Number of support vectors for each class. ndarray of shape (n_classes,), dtype=int32") - .def_property_readonly("probA_", [](const svc &) { throw py::attribute_error{ "'SVC' object has no attribute 'probA_' (not implemented)" }; }) - .def_property_readonly("probB_", [](const svc &) { throw py::attribute_error{ "'SVC' object has no attribute 'probB_' (not implemented)" }; }) - .def_property_readonly("shape_fit_", [](const svc &self) { - if (self.model_ == nullptr) { - throw py::attribute_error{ "'SVC' object has no attribute 'shape_fit_'" }; - } else { - return std::make_tuple(static_cast(self.data_->num_data_points()), static_cast(self.data_->num_features())); - } }, "Array dimensions of training vector X. tuple of int of shape (n_dimensions_of_X,)"); - - //*************************************************************************************************************************************// - // METHODS // - //*************************************************************************************************************************************// - py_svc.def("decision_function", [](const svc &, py::array_t) { - // TODO: predict_values?! - throw py::attribute_error{ "'SVC' object has no function 'decision_function' (not implemented)" }; - }); -#if !defined(PLSSVM_PYTHON_BINDINGS_LABEL_TYPE_IS_STRING) - py_svc.def( - "fit", [](svc &self, py::array_t data, py::array_t labels, std::optional> sample_weight) -> svc & { - if (sample_weight.has_value()) { - throw py::attribute_error{ "The 'sample_weight' parameter for a call to 'fit' is not implemented yet!" }; - } - - // fit the model using potentially provided keyword arguments - self.data_ = std::make_unique(pyarray_to_matrix(data), pyarray_to_vector(labels)); - fit(self); - return self; - }, - "Fit the SVM model according to the given training data.", - py::arg("X"), - py::arg("y"), - py::pos_only(), - py::arg("sample_weight") = std::nullopt, - py::return_value_policy::reference); -#else - py_svc.def( - "fit", [](svc &self, py::array_t data, py::array_t labels, const std::optional> &sample_weight) -> svc & { - if (sample_weight.has_value()) { - throw py::attribute_error{ "The 'sample_weight' parameter for a call to 'fit' is not implemented yet!" }; - } - - // fit the model using potentially provided keyword arguments - self.data_ = std::make_unique(pyarray_to_matrix(data), pyarray_to_string_vector(labels)); - fit(self); - return self; - }, - "Fit the SVM model according to the given training data.", - py::arg("X"), - py::arg("y"), - py::pos_only(), - py::arg("sample_weight") = std::nullopt, - py::return_value_policy::reference) - .def("fit", [](svc &self, py::array_t data, const py::list &labels, const std::optional> &sample_weight) -> svc & { - if (sample_weight.has_value()) { - throw py::attribute_error{ "The 'sample_weight' parameter for a call to 'fit' is not implemented yet!" }; - } - - // fit the model using potentially provided keyword arguments - self.data_ = std::make_unique(pyarray_to_matrix(data), pylist_to_string_vector(labels)); - fit(self); - return self; }, "Fit the SVM model according to the given training data.", py::arg("X"), py::arg("y"), py::pos_only(), py::arg("sample_weight") = std::nullopt, py::return_value_policy::reference); -#endif - py_svc.def( - "get_params", [](const svc &self, const bool) { - const plssvm::parameter params = self.svm_->get_params(); - - // fill a Python dictionary with the supported keys and values - py::dict py_params; - py_params["C"] = params.cost; - py_params["break_ties"] = false; - py_params["cache_size"] = 0; - py_params["class_weight"] = py::none(); - py_params["coef0"] = params.coef0; - py_params["decision_function_shape"] = self.classification == plssvm::classification_type::oaa ? "ovr" : "ovo"; - py_params["degree"] = params.degree; - if (std::holds_alternative(params.gamma)) { - py_params["gamma"] = std::get(params.gamma); - } else { - switch (std::get(params.gamma)) { - case plssvm::gamma_coefficient_type::automatic: - py_params["gamma"] = "auto"; - break; - case plssvm::gamma_coefficient_type::scale: - py_params["gamma"] = "scale"; - break; - } - } - py_params["kernel"] = fmt::format("{}", params.kernel_type); - py_params["max_iter"] = self.max_iter.has_value() ? static_cast(self.max_iter.value()) : -1; - py_params["probability"] = false; - py_params["random_state"] = py::none(); - py_params["shrinking"] = false; - py_params["tol"] = self.epsilon.value_or(typename svc::real_type{ 1e-3 }); - py_params["verbose"] = plssvm::verbosity != plssvm::verbosity_level::quiet; - - return py_params; - }, - "Get parameters for this estimator.", - py::arg("depp") = true) - .def("predict", [](svc &self, py::array_t data) { - if (self.model_ == nullptr) { - throw py::attribute_error{ "This SVC instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator." }; - } else { - const typename svc::data_set_type data_to_predict{ pyarray_to_matrix(data) }; - return vector_to_pyarray(self.svm_->predict(*self.model_, data_to_predict)); - } }, "Perform classification on samples in X.") - .def("predict_log_proba", [](const svc &, py::array_t) { throw py::attribute_error{ "'SVC' object has no function 'predict_log_proba' (not implemented)" }; }) - .def("predict_proba", [](const svc &, py::array_t) { throw py::attribute_error{ "'SVC' object has no function 'predict_proba' (not implemented)" }; }); -#if !defined(PLSSVM_PYTHON_BINDINGS_LABEL_TYPE_IS_STRING) - py_svc.def( - "score", [](svc &self, py::array_t data, py::array_t labels, std::optional> sample_weight) { - if (sample_weight.has_value()) { - throw py::attribute_error{ "The 'sample_weight' parameter for a call to 'fit' is not implemented yet!" }; - } - - if (self.model_ == nullptr) { - throw py::attribute_error{ "This SVC instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator." }; - } else { - const typename svc::data_set_type data_to_score{ pyarray_to_matrix(data), pyarray_to_vector(labels) }; - return self.svm_->score(*self.model_, data_to_score); - } - }, - "Return the mean accuracy on the given test data and labels.", - py::arg("X"), - py::arg("y"), - py::pos_only(), - py::arg("sample_weight") = std::nullopt); -#else - py_svc.def( - "score", [](svc &self, py::array_t data, py::array_t labels, const std::optional> &sample_weight) { - if (sample_weight.has_value()) { - throw py::attribute_error{ "The 'sample_weight' parameter for a call to 'fit' is not implemented yet!" }; - } - - if (self.model_ == nullptr) { - throw py::attribute_error{ "This SVC instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator." }; - } else { - const typename svc::data_set_type data_to_score{ pyarray_to_matrix(data), pyarray_to_string_vector(labels) }; - return self.svm_->score(*self.model_, data_to_score); - } - }, - "Return the mean accuracy on the given test data and labels.", - py::arg("X"), - py::arg("y"), - py::pos_only(), - py::arg("sample_weight") = std::nullopt) - .def("score", [](svc &self, py::array_t data, py::list labels, const std::optional> &sample_weight) { - if (sample_weight.has_value()) { - throw py::attribute_error{ "The 'sample_weight' parameter for a call to 'fit' is not implemented yet!" }; - } - - if (self.model_ == nullptr) { - throw py::attribute_error{ "This SVC instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator." }; - } else { - const typename svc::data_set_type data_to_score{ pyarray_to_matrix(data), pylist_to_string_vector(labels) }; - return self.svm_->score(*self.model_, data_to_score); - } }, "Return the mean accuracy on the given test data and labels.", py::arg("X"), py::arg("y"), py::pos_only(), py::arg("sample_weight") = std::nullopt); -#endif - py_svc.def( - "set_params", [](svc &self, const py::kwargs &args) -> svc & { - parse_provided_params(self, args); - return self; - }, - "Set the parameters of this estimator.", - py::return_value_policy::reference); -} diff --git a/bindings/Python/sklearn_svc.cpp b/bindings/Python/sklearn_svc.cpp new file mode 100644 index 000000000..63c4238ea --- /dev/null +++ b/bindings/Python/sklearn_svc.cpp @@ -0,0 +1,709 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "plssvm/classification_types.hpp" // plssvm::classification_type +#include "plssvm/constants.hpp" // plssvm::real_type +#include "plssvm/csvm_factory.hpp" // plssvm::make_csvc +#include "plssvm/data_set/classification_data_set.hpp" // plssvm::classification_data_set +#include "plssvm/detail/type_traits.hpp" // plssvm::detail::remove_cvref_t +#include "plssvm/gamma.hpp" // plssvm::gamma_coefficient_type, plssvm::gamma_type +#include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type +#include "plssvm/matrix.hpp" // plssvm::aos_matrix, plssvm::soa_matrix +#include "plssvm/model/classification_model.hpp" // plssvm::classification_model +#include "plssvm/parameter.hpp" // plssvm::parameter, named arguments definition +#include "plssvm/svm/csvc.hpp" // plssvm::csvc +#include "plssvm/verbosity_levels.hpp" // plssvm::verbosity_level, plssvm::verbosity + +#include "bindings/Python/data_set/variant_wrapper.hpp" // plssvm::bindings::python::util::classification_data_set_wrapper +#include "bindings/Python/model/variant_wrapper.hpp" // plssvm::bindings::python::util::classification_model_wrapper +#include "bindings/Python/type_caster/label_vector_wrapper_caster.hpp" // a custom Pybind11 type caster for a plssvm::bindings::python::label_vector_wrapper +#include "bindings/Python/type_caster/matrix_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::matrix +#include "bindings/Python/type_caster/matrix_wrapper_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::bindings::python::util::matrix_wrapper +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_gamma_kwarg_to_variant, vector_to_pyarray} + +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join +#include "pybind11/numpy.h" // support for STL types +#include "pybind11/operators.h" // support for operators +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::arg, py::return_value_policy, py::self, py::dynamic_attr, py::value_error, py::attribute_error +#include "pybind11/stl.h" // support for STL types + +#include // std::fill +#include // std::size_t +#include // std::int32_t +#include // fixed-width integers +#include // std::exception +#include // std::map +#include // std::unique_ptr, std::make_unique +#include // std::optional, std::nullopt +#include // std::string +#include // std::make_tuple, std::ignore +#include // std::move +#include // std::holds_alternative, std::variant, std::visit +#include // std::vector + +namespace py = pybind11; + +// TODO: implement missing functionality (as far es possible) + +// dummy +struct svc { + using possible_vector_types = typename plssvm::bindings::python::util::classification_data_set_wrapper::possible_vector_types; + using possible_data_set_types = typename plssvm::bindings::python::util::classification_data_set_wrapper::possible_data_set_types; + using possible_model_types = typename plssvm::bindings::python::util::classification_model_wrapper::possible_model_types; + + /** + * @brief Wrapper function to call the private (friendship) predict_values function. + * @tparam Args the types of the parameter used for calling the predict_values function + * @param[in] args the predict_values function parameter + * @return the predicted values (`[[nodiscard]]`) + */ + template + auto call_predict_values(Args &&...args) const { + return svm_->predict_values(std::forward(args)...); + } + + /** + * @brief Get the index sets used for the decision_function function in the one-vs-one classification case from the currently learned model. + * @return the index sets (`[[nodiscard]]`) + */ + [[nodiscard]] const auto &get_index_sets_ptr() const { + if (model_ == nullptr) { + throw py::attribute_error{ "This SVC instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator." }; + } + + // clang-format off + return std::visit([](auto &&model) -> const auto & { + return *model.index_sets_ptr_; + }, *model_); + // clang-format on + } + + /** + * @brief Get the w values used for the coef_ attribute from the currently learned linear model. + * @return the w values (`[[nodiscard]]`) + */ + [[nodiscard]] const auto &get_w_ptr() const { + if (model_ == nullptr) { + throw py::attribute_error{ "This SVC instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator." }; + } + + // clang-format off + return std::visit([](auto &&model) -> const auto & { + return *model.w_ptr_; + }, *model_); + // clang-format on + } + + /** + * @brief Return the currently used params. + * @details Necessary for the same Python function and also the string representation. + * @return a Python dictionary containing the used parameter (`[[nodiscard]]`) + */ + [[nodiscard]] py::dict get_params(const bool) const { + const plssvm::parameter params = svm_->get_params(); + + // fill a Python dictionary with the supported keys and values + py::dict py_params; + py_params["C"] = params.cost; + py_params["break_ties"] = false; + py_params["cache_size"] = 0; + py_params["class_weight"] = py::none(); + py_params["coef0"] = params.coef0; + py_params["decision_function_shape"] = classification_ == plssvm::classification_type::oaa ? "ovr" : "ovo"; + py_params["degree"] = params.degree; + if (std::holds_alternative(params.gamma)) { + py_params["gamma"] = std::get(params.gamma); + } else { + switch (std::get(params.gamma)) { + case plssvm::gamma_coefficient_type::automatic: + py_params["gamma"] = "auto"; + break; + case plssvm::gamma_coefficient_type::scale: + py_params["gamma"] = "scale"; + break; + } + } + py_params["kernel"] = fmt::format("{}", params.kernel_type); + py_params["max_iter"] = max_iter_.has_value() ? static_cast(max_iter_.value()) : -1; + py_params["probability"] = false; + py_params["random_state"] = py::none(); + py_params["shrinking"] = false; + py_params["tol"] = epsilon_.value_or(plssvm::real_type{ 1e-10 }); + py_params["verbose"] = plssvm::verbosity != plssvm::verbosity_level::quiet; + + return py_params; + } + + py::dtype py_dtype_{}; + std::optional epsilon_{}; + std::optional max_iter_{}; + plssvm::classification_type classification_{ plssvm::classification_type::oaa }; + + std::unique_ptr svm_ = plssvm::make_csvc(plssvm::gamma = plssvm::gamma_coefficient_type::scale); + std::unique_ptr data_{}; + std::unique_ptr model_{}; + + std::optional> feature_names_{}; +}; + +namespace { + +void parse_provided_kwargs(svc &self, const py::kwargs &args) { + // check keyword arguments + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "C", "kernel", "degree", "gamma", "coef0", "shrinking", "probability", "tol", "cache_size", "class_weight", "verbose", "max_iter", "decision_function_shape", "break_ties", "random_state" }); + + if (args.contains("C")) { + self.svm_->set_params(plssvm::cost = args["C"].cast()); + } + if (args.contains("kernel")) { + const auto kernel_str = args["kernel"].cast(); + plssvm::kernel_function_type kernel{}; + if (kernel_str == "linear") { + kernel = plssvm::kernel_function_type::linear; + } else if (kernel_str == "poly" || kernel_str == "polynomial") { + kernel = plssvm::kernel_function_type::polynomial; + } else if (kernel_str == "rbf") { + kernel = plssvm::kernel_function_type::rbf; + } else if (kernel_str == "sigmoid") { + kernel = plssvm::kernel_function_type::sigmoid; + } else if (kernel_str == "laplacian") { + kernel = plssvm::kernel_function_type::laplacian; + } else if (kernel_str == "chi_squared" || kernel_str == "chi-squared") { + kernel = plssvm::kernel_function_type::chi_squared; + } else if (kernel_str == "precomputed") { + throw py::value_error{ R"(The "kernel = 'precomputed'" parameter for the 'SVC' is not implemented yet!)" }; + } else { + throw py::value_error{ fmt::format("'{}' is not in list", kernel_str) }; + } + self.svm_->set_params(plssvm::kernel_type = kernel); + } + if (args.contains("degree")) { + self.svm_->set_params(plssvm::degree = args["degree"].cast()); + } + if (args.contains("gamma")) { + const plssvm::gamma_type gamma = plssvm::bindings::python::util::convert_gamma_kwarg_to_variant(args); + if (std::holds_alternative(gamma)) { + self.svm_->set_params(plssvm::gamma = std::get(gamma)); + } else { + self.svm_->set_params(plssvm::gamma = std::get(gamma)); + } + } + if (args.contains("coef0")) { + self.svm_->set_params(plssvm::coef0 = args["coef0"].cast()); + } + if (args.contains("shrinking")) { + throw py::value_error{ "The 'shrinking' parameter for the 'SVC' is not implemented and makes no sense for a LS-SVM!" }; + } + if (args.contains("probability")) { + throw py::value_error{ "The 'probability' parameter for the 'SVC' is not implemented yet!" }; + } + if (args.contains("tol")) { + self.epsilon_ = args["tol"].cast(); + } + if (args.contains("cache_size")) { + throw py::value_error{ "The 'cache_size' parameter for the 'SVC' is not implemented and makes no sense for our PLSSVM implementation!" }; + } + if (args.contains("class_weight")) { + throw py::value_error{ "The 'class_weight' parameter for the 'SVC' is not implemented yet!" }; + } + if (args.contains("verbose")) { + if (args["verbose"].cast()) { + if (plssvm::verbosity == plssvm::verbosity_level::quiet) { + // if current verbosity is quiet, override with full verbosity, since 'verbose=TRUE' should never result in no output + plssvm::verbosity = plssvm::verbosity_level::full; + } + // otherwise: use currently active verbosity level + } else { + plssvm::verbosity = plssvm::verbosity_level::quiet; + } + } + if (args.contains("max_iter")) { + const auto max_iter = args["max_iter"].cast(); + if (max_iter > 0) { + // use provided value + self.max_iter_ = static_cast(max_iter); + } else if (max_iter == -1) { + // default behavior in PLSSVM -> do nothing + } else { + // invalid max_iter provided + throw py::value_error{ fmt::format("max_iter must either be greater than zero or -1, got {}!", max_iter) }; + } + } + if (args.contains("decision_function_shape")) { + const std::string &dfs = args["decision_function_shape"].cast(); + if (dfs == "ovo") { + self.classification_ = plssvm::classification_type::oao; + } else if (dfs == "ovr") { + self.classification_ = plssvm::classification_type::oaa; + } else { + throw py::value_error{ fmt::format("decision_function_shape must be either 'ovr' or 'ovo', got {}.", dfs) }; + } + } + if (args.contains("break_ties")) { + throw py::value_error{ "The 'break_ties' parameter for the 'SVC' is not implemented yet!" }; + } + if (args.contains("random_state")) { + throw py::value_error{ "The 'random_state' parameter for the 'SVC' is not implemented yet!" }; + } +} + +void fit(svc &self) { + // perform sanity checks + if (self.svm_->get_params().cost <= plssvm::real_type{ 0.0 }) { + throw py::value_error{ "C <= 0" }; + } + if (self.svm_->get_params().degree < 0) { + throw py::value_error{ "degree of polynomial kernel < 0" }; + } + if (self.epsilon_.has_value() && self.epsilon_.value() <= plssvm::real_type{ 0.0 }) { + throw py::value_error{ "eps <= 0" }; + } + + // fit the model using potentially provided keyword arguments + std::visit([&](auto &&data) { + using possible_model_types = typename svc::possible_model_types; + + if (self.epsilon_.has_value() && self.max_iter_.has_value()) { + self.model_ = std::make_unique(self.svm_->fit(data, + plssvm::classification = self.classification_, + plssvm::epsilon = self.epsilon_.value(), + plssvm::max_iter = self.max_iter_.value())); + } else if (self.epsilon_.has_value()) { + self.model_ = std::make_unique(self.svm_->fit(data, + plssvm::classification = self.classification_, + plssvm::epsilon = self.epsilon_.value())); + } else if (self.max_iter_.has_value()) { + self.model_ = std::make_unique(self.svm_->fit(data, + plssvm::classification = self.classification_, + plssvm::max_iter = self.max_iter_.value())); + } else { + self.model_ = std::make_unique(self.svm_->fit(data, + plssvm::classification = self.classification_)); + } + }, + *self.data_); +} + +template +[[nodiscard]] std::vector calculate_sv_indices_per_class(const svc &self) { + return std::visit([&](auto &&model) { + using label_type = typename plssvm::detail::remove_cvref_t::label_type; + + std::map> indices_per_class{}; + // init index-map map + for (const label_type &label : model.classes()) { + indices_per_class.insert({ label, std::vector{} }); + } + // sort the indices into the respective bucket based on their associated class + for (std::size_t idx = 0; idx < model.num_support_vectors(); ++idx) { + indices_per_class[model.labels()->get()[idx]].push_back(static_cast(idx)); + } + // convert map values to vector + std::vector support{}; + support.reserve(model.num_support_vectors()); + for (const auto &[label, indices] : indices_per_class) { + support.insert(support.cend(), indices.cbegin(), indices.cend()); + } + return support; + }, + *self.model_); +} + +} // namespace + +void init_sklearn_svc(py::module_ &m) { + // documentation based on sklearn.svm.SVC documentation + py::class_ py_svc(m, "SVC", py::dynamic_attr(), "A C-SVC implementation adhering to sklearn.svm.SVC using PLSSVM as backend."); + py_svc.def(py::init([](const py::kwargs &args) { + // to silence constructor messages + if (args.contains("verbose")) { + if (args["verbose"].cast()) { + if (plssvm::verbosity == plssvm::verbosity_level::quiet) { + // if current verbosity is quiet, override with full verbosity, since 'verbose=TRUE' should never result in no output + plssvm::verbosity = plssvm::verbosity_level::full; + } + // otherwise: use currently active verbosity level + } else { + plssvm::verbosity = plssvm::verbosity_level::quiet; + } + } else { + // sklearn default is quiet + plssvm::verbosity = plssvm::verbosity_level::quiet; + } + + // create SVC class + auto self = std::make_unique(); + parse_provided_kwargs(*self, args); + return self; + }), + "Construct a new SVC classifier."); + + //*************************************************************************************************************************************// + // ATTRIBUTES // + //*************************************************************************************************************************************// + py_svc + .def_property_readonly("class_weight_", [](const svc &self) -> py::array { + if (self.model_ == nullptr) { + throw py::attribute_error{ "'SVC' object has no attribute 'class_weight_'" }; + } + + // note: constant zero since the class_weight parameter is currently not supported + const auto size = static_cast(std::visit([](auto &&model) { return model.num_classes(); }, *self.model_)); + py::array_t py_array(size); + const py::buffer_info buffer = py_array.request(); + auto ptr = static_cast(buffer.ptr); + std::fill(ptr, ptr + size, plssvm::real_type{ 1.0 }); + return py_array; }, "Multipliers of parameter C for each class. ndarray of shape (n_classes,)") + .def_property_readonly("classes_", [](const svc &self) -> py::array { + if (self.model_ == nullptr) { + throw py::attribute_error{ "'SVC' object has no attribute 'classes_'" }; + } + + return std::visit([](auto &&data) -> py::array { + return plssvm::bindings::python::util::vector_to_pyarray(data.classes().value()); + }, *self.data_); }, "The classes labels. ndarray of shape (n_classes,)") + .def_property_readonly("coef_", [](const svc &self) -> py::array { + if (self.model_ == nullptr) { + throw py::attribute_error{ "'SVC' object has no attribute 'coef_'" }; + } + if (self.svm_->get_params().kernel_type != plssvm::kernel_function_type::linear) { + throw py::attribute_error{ "coef_ is only available when using a linear kernel" }; + } + + return std::visit([&](auto &&model) { + // check if the w ptr has already been set + if (self.get_w_ptr().empty()) { + // score + std::ignore = self.svm_->score(model); + } + + // now, the w ptr is set and can be used + return py::cast(self.get_w_ptr()); + }, *self.model_); }, "Weights assigned to the features when kernel=\"linear\". ovo: ndarray of shape (n_classes * (n_classes - 1) / 2, n_features). ovr: (n_classes, n_features)") + .def_property_readonly("dual_coef_", [](const svc &) { throw py::attribute_error{ "'SVC' object has no attribute 'dual_coef_' (not implemented)" }; }, "Dual coefficients of the support vector in the decision function, multiplied by their targets. ndarray of shape (n_classes - 1, n_SV)") + .def_property_readonly("fit_status_", [](const svc &self) -> int { + if (self.model_ == nullptr) { + throw py::attribute_error{ "'SVC' object has no attribute 'fit_status_'" }; + } + + return 0; }, "0 if correctly fitted, 1 otherwise (will raise exception). int") + .def_property_readonly("intercept_", [](const svc &self) -> py::array { + if (self.model_ == nullptr) { + throw py::attribute_error{ "'SVC' object has no attribute 'intercept_'" }; + } + + return std::visit([&](auto &&model) { + std::vector rho = model.rho(); + + // ovr binary special case + if (self.classification_ == plssvm::classification_type::oaa && model.num_classes() == 2) { + rho.pop_back(); + } + + return plssvm::bindings::python::util::vector_to_pyarray(rho); + }, *self.model_); }, "Constants in decision function. ovo: ndarray of shape (n_classes * (n_classes - 1) / 2,). ovr: ndarray of shape (n_classes,)") + .def_property_readonly("n_features_in_", [](const svc &self) -> int { + if (self.model_ == nullptr) { + throw py::attribute_error{ "'SVC' object has no attribute 'n_features_in_'" }; + } + + return static_cast(std::visit([](auto &&data) { return data.num_features(); }, *self.data_)); }, "Number of features seen during fit. int") + .def_property_readonly("feature_names_in_", [](const svc &self) { + if (!self.feature_names_.has_value()) { + throw py::attribute_error{ "'SVC' object has no attribute 'feature_names_in_'" }; + } + + return plssvm::bindings::python::util::vector_to_pyarray(self.feature_names_.value()); }, "Names of features seen during fit. ndarray of shape (n_features_in_,)") + .def_property_readonly("n_iter_", [](const svc &self) -> py::array { + if (self.model_ == nullptr) { + throw py::attribute_error{ "'SVC' object has no attribute 'support_'" }; + } + + return std::visit([](auto &&model) { return plssvm::bindings::python::util::vector_to_pyarray(model.num_iters().value()); }, *self.model_); }, "Number of iterations run by the optimization routine to fit the model. ndarray of shape (n_classes * (n_classes - 1) // 2,)") + .def_property_readonly("support_", [](const svc &self) -> py::array { + if (self.model_ == nullptr) { + throw py::attribute_error{ "'SVC' object has no attribute 'support_'" }; + } + + return plssvm::bindings::python::util::vector_to_pyarray(calculate_sv_indices_per_class(self)); }, "Indices of support vectors. ndarray of shape (n_SV)") + .def_property_readonly("support_vectors_", [](const svc &self) -> py::array { + if (self.model_ == nullptr) { + throw py::attribute_error{ "'SVC' object has no attribute 'support_vectors_'" }; + } + + // get the sorted indices + const std::vector support = calculate_sv_indices_per_class(self); + // convert support vectors matrix to 2d vector + std::vector> sv = std::visit([](auto &&model) { return model.support_vectors().to_2D_vector(); }, *self.model_); + + // sort support vectors by their class + std::vector> sorted_sv{}; + sorted_sv.reserve(sv.size()); + for (const int idx : support) { + sorted_sv.push_back(std::move(sv[idx])); + } + + // convert 2D vector back to plssvm::matrix + return py::cast(plssvm::aos_matrix{ std::move(sorted_sv) }); }, "Support vectors. ndarray of shape (n_SV, n_features)") + .def_property_readonly("n_support_", [](const svc &self) -> py::array { + if (self.model_ == nullptr) { + throw py::attribute_error{ "'SVC' object has no attribute 'n_support_'" }; + } + + return std::visit([&](auto &&model) { + using label_type = typename plssvm::detail::remove_cvref_t::label_type; + + std::map occurrences{}; + // init count map + for (const label_type &label : model.classes()) { + occurrences.insert({ label, std::int32_t{ 0 } }); + } + // count occurrences + for (const label_type &label : model.labels()->get()) { + ++occurrences[label]; + } + // convert map values to vector + std::vector n_support{}; + n_support.reserve(occurrences.size()); + for (const auto &[label, n_sv] : occurrences) { + n_support.push_back(n_sv); + } + // convert to Numpy array + return plssvm::bindings::python::util::vector_to_pyarray(n_support); + }, *self.model_); }, "Number of support vectors for each class. ndarray of shape (n_classes,), dtype=int32") + .def_property_readonly("probA_", [](const svc &) { throw py::attribute_error{ "'SVC' object has no attribute 'probA_' (not implemented)" }; }, "Parameter learned in Platt scaling when probability=True. ndarray of shape (n_classes * (n_classes - 1) / 2)") + .def_property_readonly("probB_", [](const svc &) { throw py::attribute_error{ "'SVC' object has no attribute 'probB_' (not implemented)" }; }, "Parameter learned in Platt scaling when probability=True. ndarray of shape (n_classes * (n_classes - 1) / 2)") + .def_property_readonly("shape_fit_", [](const svc &self) { + if (self.model_ == nullptr) { + throw py::attribute_error{ "'SVC' object has no attribute 'shape_fit_'" }; + } + + return std::visit([](auto &&data) { return std::make_tuple(static_cast(data.num_data_points()), static_cast(data.num_features())); }, *self.data_); }, "Array dimensions of training vector X. tuple of int of shape (n_dimensions_of_X,)") + .def_property_readonly("_estimator_type", [](const svc &) { return "classifier"; }, "The type of estimator. Always 'classifier' for SVC."); + + //*************************************************************************************************************************************// + // METHODS // + //*************************************************************************************************************************************// + py_svc + .def("decision_function", [](const svc &self, plssvm::soa_matrix predict_points) -> py::array { + if (self.model_ == nullptr) { + throw py::attribute_error{ "This SVC instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator." }; + } + + return std::visit([&](auto &&model) -> py::array { + switch (self.classification_) { + case plssvm::classification_type::oaa: + { + const plssvm::parameter ¶ms = model.get_params(); + const plssvm::soa_matrix &sv = model.support_vectors(); + const plssvm::aos_matrix &alpha = model.weights().front(); // num_classes x num_data_points + const std::vector &rho = model.rho(); + plssvm::soa_matrix w{}; // empty -> no need to befriend the model class! + + // predict values using OAA -> num_data_points x num_classes + const plssvm::aos_matrix votes = self.call_predict_values(params, sv, alpha, rho, w, predict_points); + + // special case for binary classification + if (model.num_classes() == 2) { + std::vector reduced_votes(votes.num_rows()); + for (std::size_t i = 0; i < votes.num_rows(); ++i) { + reduced_votes[i] = -votes(i, 0); + } + return plssvm::bindings::python::util::vector_to_pyarray(reduced_votes); + } else { + return py::cast(votes); + } + } + case plssvm::classification_type::oao: + { + const std::size_t num_features = model.num_features(); + const std::size_t num_classes = model.num_classes(); + const std::vector> &index_sets = self.get_index_sets_ptr(); + + const plssvm::parameter ¶ms = model.get_params(); + const std::vector> &alpha = model.weights(); + const std::vector &rho = model.rho(); + + // create the numpy array + py::array_t votes{ { predict_points.num_rows(), plssvm::calculate_number_of_classifiers(plssvm::classification_type::oao, num_classes) } }; + auto votes_access = votes.mutable_unchecked<2>(); + + // perform one vs. one prediction + std::size_t pos = 0; + for (std::size_t i = 0; i < num_classes; ++i) { + for (std::size_t j = i + 1; j < num_classes; ++j) { + // assemble one vs. one classification matrix and rhs + const std::size_t num_data_points_in_sub_matrix{ index_sets[i].size() + index_sets[j].size() }; + const plssvm::aos_matrix &binary_alpha = alpha[pos]; + const std::vector binary_rho{ rho[pos] }; + + // create binary support vector matrix, based on the number of classes + const plssvm::soa_matrix &binary_sv = [&]() { + if (num_classes == 2) { + // no special assembly needed in binary case + return model.support_vectors(); + } else { + // note: if this is changed, it must also be changed in the libsvm_model_parsing.hpp in the calculate_alpha_idx function!!! + // order the indices in increasing order + plssvm::soa_matrix temp{ plssvm::shape{ num_data_points_in_sub_matrix, num_features }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } }; + std::vector sorted_indices(num_data_points_in_sub_matrix); + std::merge(index_sets[i].cbegin(), index_sets[i].cend(), index_sets[j].cbegin(), index_sets[j].cend(), sorted_indices.begin()); +// copy the support vectors to the binary support vectors +// NOTE: it seems that MSVC doesn't like the collapse clause inside a lambda function +#if defined(_MSC_VER) + #pragma omp parallel for +#else + #pragma omp parallel for collapse(2) +#endif + for (std::size_t si = 0; si < num_data_points_in_sub_matrix; ++si) { + for (std::size_t dim = 0; dim < num_features; ++dim) { + temp(si, dim) = model.support_vectors()(sorted_indices[si], dim); + } + } + return temp; + } + }(); + + // we don't use the w optimization for the linear kernel here due to code simplicity + plssvm::soa_matrix w{}; + // predict the values + const plssvm::aos_matrix binary_votes = self.call_predict_values(params, binary_sv, binary_alpha, binary_rho, w, predict_points); + + // update final votes + for (std::size_t pp = 0; pp < predict_points.num_rows(); ++pp) { + votes_access(pp, pos) = binary_votes(pp, 0); + } + + // go to next one vs. one classification + ++pos; + // order of the alpha value: 0 vs 1, 0 vs 2, 0 vs 3, 1 vs 2, 1 vs 3, 2 vs 3 + } + } + + // special case binary classification + if (num_classes == 2) { + for (std::size_t pp = 0; pp < predict_points.num_rows(); ++pp) { + votes_access(pp, pos) *= plssvm::real_type{ -1.0 }; + } + return votes.reshape(py::array::ShapeContainer{ votes.size() }); + } else { + return votes; + } + } + } + // unreachable + return py::array{}; + }, *self.model_); }, "Evaluate the decision function for the samples in X.") + .def("fit", [](svc &self, plssvm::bindings::python::util::soa_matrix_wrapper data, plssvm::bindings::python::util::label_vector_wrapper labels, const std::optional> &sample_weight) -> svc & { + // sanity check parameter + if (sample_weight.has_value()) { + throw py::attribute_error{ "The 'sample_weight' parameter for a call to 'fit' is not implemented yet!" }; + } + + // store the used label type + self.py_dtype_ = labels.dtype; + + // retrieve the potential feature names + self.feature_names_ = std::move(data.feature_names); + + // create the data set to fit + std::visit([&](auto &&labels_vector) { + // get the label type and possible data set types + using label_type = typename plssvm::detail::remove_cvref_t::value_type; + using possible_data_set_types = typename svc::possible_data_set_types; + // create the data set to fit + self.data_ = std::make_unique(plssvm::classification_data_set(std::move(data.matrix), std::move(labels_vector))); + }, + labels.labels); + + // fit the model + fit(self); + return self; }, "Fit the SVM model according to the given training data.", py::arg("X"), py::arg("y"), py::pos_only(), py::arg("sample_weight") = std::nullopt, py::return_value_policy::reference) + .def("get_metadata_routing", [](const svc &) { throw py::attribute_error{ "'SVC' object has no function 'get_metadata_routing' (not implemented)" }; }, "Get metadata routing of this object.") + .def("get_params", &svc::get_params, "Get parameters for this estimator.", py::arg("deep") = true) + .def("predict", [](svc &self, plssvm::soa_matrix data) -> py::array { + if (self.model_ == nullptr) { + throw py::attribute_error{ "This SVC instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator." }; + } + + return std::visit([&](auto &&model) { + // get the label type + using label_type = typename plssvm::detail::remove_cvref_t::label_type; + // create the data set to predict + const plssvm::classification_data_set data_to_predict{ std::move(data) }; + // predict the data + return plssvm::bindings::python::util::vector_to_pyarray(self.svm_->predict(model, data_to_predict)); + }, *self.model_); }, "Perform classification on samples in X.") + .def("predict_log_proba", [](const svc &, py::array_t) { throw py::attribute_error{ "'SVC' object has no function 'predict_log_proba' (not implemented)" }; }, "Compute log probabilities of possible outcomes for samples in X.") + .def("predict_proba", [](const svc &, py::array_t) { throw py::attribute_error{ "'SVC' object has no function 'predict_proba' (not implemented)" }; }, "Compute probabilities of possible outcomes for samples in X.") + .def("score", [](svc &self, plssvm::soa_matrix data, plssvm::bindings::python::util::label_vector_wrapper labels, const std::optional> &sample_weight) -> plssvm::real_type { + // sanity check parameter + if (sample_weight.has_value()) { + throw py::attribute_error{ "The 'sample_weight' parameter for a call to 'fit' is not implemented yet!" }; + } + if (self.model_ == nullptr) { + throw py::attribute_error{ "This SVC instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator." }; + } + + // score the data + return std::visit([&](auto &&labels_vector) { + // get the label types + using label_type = typename plssvm::detail::remove_cvref_t::value_type; + // create the data set to score + const plssvm::classification_data_set data_to_score{ std::move(data), std::move(labels_vector) }; + // score the data + try { + return self.svm_->score(std::get>(*self.model_), data_to_score); + } catch (const std::exception &) { + throw py::value_error{ fmt::format(R"(The dtype of the labels to score is "{}", but the model was fitted with "{}". Please use the same types for fit and score!)", labels.dtype.attr("name").cast(), self.py_dtype_.attr("name").cast()) }; + } + }, labels.labels); }, "Return the mean accuracy on the given test data and labels.", py::arg("X"), py::arg("y"), py::pos_only(), py::arg("sample_weight") = std::nullopt) + .def("set_fit_request", [](const svc &) { throw py::attribute_error{ "'SVC' object has no function 'set_fit_request' (not implemented)" }; }, "Request metadata passed to the fit method.") + .def("set_params", [](svc &self, const py::kwargs &args) -> svc & { + parse_provided_kwargs(self, args); + return self; }, "Set the parameters of this estimator.", py::return_value_policy::reference) + .def("set_score_request", [](const svc &) { throw py::attribute_error{ "'SVC' object has no function 'set_score_request' (not implemented)" }; }, "Request metadata passed to the score method.") + .def("__sklearn_is_fitted__", [](const svc &self) -> bool { return self.model_ != nullptr; }, "Return True if the estimator is fitted, False otherwise.") + .def("__sklearn_clone__", [](const svc &self) -> svc { + // create a new SVC instance + svc new_svc{}; + // copy the parameters + new_svc.svm_->set_params(self.svm_->get_params()); + new_svc.py_dtype_ = self.py_dtype_; + new_svc.epsilon_ = self.epsilon_; + new_svc.max_iter_ = self.max_iter_; + new_svc.classification_ = self.classification_; + return new_svc; }, "Clone the estimator.") + .def("__repr__", [](const svc &self) { + // get the currently used parameters + py::dict used_params = self.get_params(true); + py::dict default_params = svc{}.get_params(true); + + std::vector non_default_values{}; + + // iterate over all available keys and check if the currently used one differs from the default one + for (auto item : used_params) { + const auto key = item.first.cast(); + + // get the values as string + const std::string used_param_str = py::str(used_params[key.c_str()]); + const std::string default_param_str = py::str(default_params[key.c_str()]); + + // check if the parameter values are identical, if not, add them to the vector + if (used_param_str != default_param_str) { + if (py::isinstance(used_params[key.c_str()])) { + non_default_values.push_back(fmt::format("{}='{}'", key, used_param_str)); + } else { + non_default_values.push_back(fmt::format("{}={}", key, used_param_str)); + } + } + } + + return fmt::format("plssvm.SVC({})", fmt::join(non_default_values, ", ")); }); +} diff --git a/bindings/Python/sklearn_svr.cpp b/bindings/Python/sklearn_svr.cpp new file mode 100644 index 000000000..9256b49a0 --- /dev/null +++ b/bindings/Python/sklearn_svr.cpp @@ -0,0 +1,434 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "plssvm/constants.hpp" // plssvm::real_type +#include "plssvm/csvm_factory.hpp" // plssvm::make_csvr +#include "plssvm/data_set/regression_data_set.hpp" // plssvm::regression_data_set +#include "plssvm/detail/type_traits.hpp" // plssvm::detail::remove_cvref_t +#include "plssvm/gamma.hpp" // plssvm::gamma_coefficient_type, plssvm::gamma_type +#include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type +#include "plssvm/matrix.hpp" // plssvm::aos_matrix, plssvm::soa_matrix +#include "plssvm/model/regression_model.hpp" // plssvm::regression_model +#include "plssvm/parameter.hpp" // plssvm::parameter, named arguments definition +#include "plssvm/svm/csvr.hpp" // plssvm::csvr +#include "plssvm/verbosity_levels.hpp" // plssvm::verbosity_level, plssvm::verbosity + +#include "bindings/Python/data_set/variant_wrapper.hpp" // plssvm::bindings::python::util::regression_data_set_wrapper +#include "bindings/Python/model/variant_wrapper.hpp" // plssvm::bindings::python::util::regression_model_wrapper +#include "bindings/Python/type_caster/label_vector_wrapper_caster.hpp" // a custom Pybind11 type caster for a plssvm::bindings::python::util::label_vector_wrapper +#include "bindings/Python/type_caster/matrix_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::matrix +#include "bindings/Python/type_caster/matrix_wrapper_type_caster.hpp" // a custom Pybind11 type caster for a plssvm::bindings::python::util::matrix_wrapper +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_gamma_kwarg_to_variant, vector_to_pyarray} + +#include "fmt/format.h" // fmt::format +#include "pybind11/numpy.h" // support for STL types +#include "pybind11/operators.h" // support for operators +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::arg, py::return_value_policy, py::self, py::dynamic_attr, py::value_error, py::attribute_error +#include "pybind11/stl.h" // support for STL types + +#include // std::int32_t +#include // std::unique_ptr, std::make_unique +#include // std::iota +#include // std::optional, std::nullopt +#include // std::string +#include // std::make_tuple +#include // std::move +#include // std::holds_alternative +#include // std::vector + +namespace py = pybind11; + +// TODO: implement missing functionality (as far es possible) + +// dummy +struct svr { + using possible_vector_types = typename plssvm::bindings::python::util::regression_data_set_wrapper::possible_vector_types; + using possible_data_set_types = typename plssvm::bindings::python::util::regression_data_set_wrapper::possible_data_set_types; + using possible_model_types = typename plssvm::bindings::python::util::regression_model_wrapper::possible_model_types; + + /** + * @brief Get the w values used for the coef_ attribute from the currently learned linear model. + * @return the w values (`[[nodiscard]]`) + */ + [[nodiscard]] const auto &get_w_ptr() const { + if (model_ == nullptr) { + throw py::attribute_error{ "This SVR instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator." }; + } + return std::visit([](auto &&model) -> const auto & { return *model.w_ptr_; }, *model_); + } + + /** + * @brief Return the currently used params. + * @details Necessary for the same Python function and also the string representation. + * @return a Python dictionary containing the used parameter (`[[nodiscard]]`) + */ + [[nodiscard]] py::dict get_params(const bool) const { + const plssvm::parameter params = svm_->get_params(); + + // fill a Python dictionary with the supported keys and values + py::dict py_params; + py_params["C"] = params.cost; + py_params["cache_size"] = 0; + py_params["coef0"] = params.coef0; + py_params["degree"] = params.degree; + if (std::holds_alternative(params.gamma)) { + py_params["gamma"] = std::get(params.gamma); + } else { + switch (std::get(params.gamma)) { + case plssvm::gamma_coefficient_type::automatic: + py_params["gamma"] = "auto"; + break; + case plssvm::gamma_coefficient_type::scale: + py_params["gamma"] = "scale"; + break; + } + } + py_params["kernel"] = fmt::format("{}", params.kernel_type); + py_params["max_iter"] = max_iter_.has_value() ? static_cast(max_iter_.value()) : -1; + py_params["shrinking"] = false; + py_params["tol"] = epsilon_.value_or(plssvm::real_type{ 1e-10 }); + py_params["verbose"] = plssvm::verbosity != plssvm::verbosity_level::quiet; + + return py_params; + } + + py::dtype py_dtype_{}; + std::optional epsilon_{}; + std::optional max_iter_{}; + + std::unique_ptr svm_ = plssvm::make_csvr(plssvm::gamma = plssvm::gamma_coefficient_type::scale); + std::unique_ptr data_{}; + std::unique_ptr model_{}; + + std::optional> feature_names_{}; +}; + +namespace { + +void parse_provided_kwargs(svr &self, const py::kwargs &args) { + // check keyword arguments + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "C", "kernel", "degree", "gamma", "coef0", "shrinking", "tol", "cache_size", "verbose", "max_iter", "epsilon" }); + + if (args.contains("C")) { + self.svm_->set_params(plssvm::cost = args["C"].cast()); + } + if (args.contains("kernel")) { + const auto kernel_str = args["kernel"].cast(); + plssvm::kernel_function_type kernel{}; + if (kernel_str == "linear") { + kernel = plssvm::kernel_function_type::linear; + } else if (kernel_str == "poly" || kernel_str == "polynomial") { + kernel = plssvm::kernel_function_type::polynomial; + } else if (kernel_str == "rbf") { + kernel = plssvm::kernel_function_type::rbf; + } else if (kernel_str == "sigmoid") { + kernel = plssvm::kernel_function_type::sigmoid; + } else if (kernel_str == "laplacian") { + kernel = plssvm::kernel_function_type::laplacian; + } else if (kernel_str == "chi_squared" || kernel_str == "chi-squared") { + kernel = plssvm::kernel_function_type::chi_squared; + } else if (kernel_str == "precomputed") { + throw py::value_error{ R"(The "kernel = 'precomputed'" parameter for the 'SVR' is not implemented yet!)" }; + } else { + throw py::value_error{ fmt::format("'{}' is not in list", kernel_str) }; + } + self.svm_->set_params(plssvm::kernel_type = kernel); + } + if (args.contains("degree")) { + self.svm_->set_params(plssvm::degree = args["degree"].cast()); + } + if (args.contains("gamma")) { + const plssvm::gamma_type gamma = plssvm::bindings::python::util::convert_gamma_kwarg_to_variant(args); + if (std::holds_alternative(gamma)) { + self.svm_->set_params(plssvm::gamma = std::get(gamma)); + } else { + self.svm_->set_params(plssvm::gamma = std::get(gamma)); + } + } + if (args.contains("coef0")) { + self.svm_->set_params(plssvm::coef0 = args["coef0"].cast()); + } + if (args.contains("shrinking")) { + throw py::value_error{ "The 'shrinking' parameter for the 'SVR' is not implemented yet!" }; + } + if (args.contains("tol")) { + self.epsilon_ = args["tol"].cast(); + } + if (args.contains("cache_size")) { + throw py::value_error{ "The 'cache_size' parameter for the 'SVR' is not implemented yet!" }; + } + if (args.contains("verbose")) { + if (args["verbose"].cast()) { + if (plssvm::verbosity == plssvm::verbosity_level::quiet) { + // if current verbosity is quiet, override with full verbosity, since 'verbose=TRUE' should never result in no output + plssvm::verbosity = plssvm::verbosity_level::full; + } + // otherwise: use currently active verbosity level + } else { + plssvm::verbosity = plssvm::verbosity_level::quiet; + } + } + if (args.contains("max_iter")) { + const auto max_iter = args["max_iter"].cast(); + if (max_iter > 0) { + // use provided value + self.max_iter_ = static_cast(max_iter); + } else if (max_iter == -1) { + // default behavior in PLSSVM -> do nothing + } else { + // invalid max_iter provided + throw py::value_error{ fmt::format("max_iter must either be greater than zero or -1, got {}!", max_iter) }; + } + } + if (args.contains("epsilon")) { + throw py::value_error{ "The 'epsilon' parameter for the 'SVR' is not implemented yet!" }; + } +} + +void fit(svr &self) { + // perform sanity checks + if (self.svm_->get_params().cost <= plssvm::real_type{ 0.0 }) { + throw py::value_error{ "C <= 0" }; + } + if (self.svm_->get_params().degree < 0) { + throw py::value_error{ "degree of polynomial kernel < 0" }; + } + if (self.epsilon_.has_value() && self.epsilon_.value() <= plssvm::real_type{ 0.0 }) { + throw py::value_error{ "eps <= 0" }; + } + + // fit the model using potentially provided keyword arguments + std::visit([&](auto &&data) { + using possible_model_types = typename svr::possible_model_types; + + if (self.epsilon_.has_value() && self.max_iter_.has_value()) { + self.model_ = std::make_unique(self.svm_->fit(data, + plssvm::epsilon = self.epsilon_.value(), + plssvm::max_iter = self.max_iter_.value())); + } else if (self.epsilon_.has_value()) { + self.model_ = std::make_unique(self.svm_->fit(data, + plssvm::epsilon = self.epsilon_.value())); + } else if (self.max_iter_.has_value()) { + self.model_ = std::make_unique(self.svm_->fit(data, + plssvm::max_iter = self.max_iter_.value())); + } else { + self.model_ = std::make_unique(self.svm_->fit(data)); + } + }, + *self.data_); +} + +} // namespace + +void init_sklearn_svr(py::module_ &m) { + // documentation based on sklearn.svm.SVR documentation + py::class_ py_svr(m, "SVR", py::dynamic_attr(), "A C-SVR implementation adhering to sklearn.svm.SVR using PLSSVM as backend."); + py_svr.def(py::init([](const py::kwargs &args) { + // to silence constructor messages + if (args.contains("verbose")) { + if (args["verbose"].cast()) { + if (plssvm::verbosity == plssvm::verbosity_level::quiet) { + // if current verbosity is quiet, override with full verbosity, since 'verbose=TRUE' should never result in no output + plssvm::verbosity = plssvm::verbosity_level::full; + } + // otherwise: use currently active verbosity level + } else { + plssvm::verbosity = plssvm::verbosity_level::quiet; + } + } else { + // sklearn default is quiet + plssvm::verbosity = plssvm::verbosity_level::quiet; + } + + // create SVR class + auto self = std::make_unique(); + parse_provided_kwargs(*self, args); + return self; + }), + "Construct a new SVR classifier."); + + //*************************************************************************************************************************************// + // ATTRIBUTES // + //*************************************************************************************************************************************// + py_svr + .def_property_readonly("coef_", [](const svr &self) -> py::array { + if (self.model_ == nullptr) { + throw py::attribute_error{ "'SVr' object has no attribute 'coef_'" }; + } + if (self.svm_->get_params().kernel_type != plssvm::kernel_function_type::linear) { + throw py::attribute_error{ "coef_ is only available when using a linear kernel" }; + } + + return py::cast(self.get_w_ptr()); }, "Weights assigned to the features when kernel=\"linear\". ndarray of shape (n_features, n_classes)") + .def_property_readonly("dual_coef_", [](const svr &) { throw py::attribute_error{ "'SVR' object has no attribute 'dual_coef_' (not implemented)" }; }, "Dual coefficients of the support vector in the decision function. ndarray of shape (1, n_SV)") + .def_property_readonly("fit_status_", [](const svr &self) -> int { + if (self.model_ == nullptr) { + throw py::attribute_error{ "'SVR' object has no attribute 'fit_status_'" }; + } + + return 0; }, "0 if correctly fitted, 1 otherwise (will raise exception). int") + .def_property_readonly("intercept_", [](const svr &self) -> py::array { + if (self.model_ == nullptr) { + throw py::attribute_error{ "'SVR' object has no attribute 'intercept_'" }; + } + + return std::visit([&](auto &&model) { return plssvm::bindings::python::util::vector_to_pyarray(model.rho()); }, *self.model_); }, "Constants in decision function. ndarray of shape (1,)") + .def_property_readonly("n_features_in_", [](const svr &self) -> int { + if (self.model_ == nullptr) { + throw py::attribute_error{ "'SVR' object has no attribute 'n_features_in_'" }; + } + + return static_cast(std::visit([](auto &&data) { return data.num_features(); }, *self.data_)); }, "Number of features seen during fit. int") + .def_property_readonly("feature_names_in_", [](const svr &self) { + if (!self.feature_names_.has_value()) { + throw py::attribute_error{ "'SVR' object has no attribute 'feature_names_in_'" }; + } + + return plssvm::bindings::python::util::vector_to_pyarray(self.feature_names_.value()); }, "Names of features seen during fit. ndarray of shape (n_features_in_,)") + .def_property_readonly("n_iter_", [](const svr &self) -> int { + if (self.model_ == nullptr) { + throw py::attribute_error{ "'SVR' object has no attribute 'support_'" }; + } + + return std::visit([](auto &&model) { return static_cast(model.num_iters().value().front()); }, *self.model_); }, "Number of iterations run by the optimization routine to fit the model. int") + .def_property_readonly("support_", [](const svr &self) -> py::array { + if (self.model_ == nullptr) { + throw py::attribute_error{ "'SVR' object has no attribute 'support_'" }; + } + + // for the SVR, the indices do not need to be sorted + std::vector support(std::visit([](auto &&model) { return model.num_support_vectors(); }, *self.model_)); + std::iota(support.begin(), support.end(), 0); + return plssvm::bindings::python::util::vector_to_pyarray(support); }, "Indices of support vectors. ndarray of shape (n_SV)") + .def_property_readonly("support_vectors_", [](const svr &self) -> py::array { + if (self.model_ == nullptr) { + throw py::attribute_error{ "'SVR' object has no attribute 'support_vectors_'" }; + } + + // for the SVR, the support vectors do not need to be sorted + return std::visit([](auto &&model) { return py::cast(model.support_vectors()); }, *self.model_); }, "Support vectors. ndarray of shape (n_SV, n_features)") + .def_property_readonly("n_support_", [](const svr &self) -> py::array { + if (self.model_ == nullptr) { + throw py::attribute_error{ "'SVR' object has no attribute 'n_support_'" }; + } + + return std::visit([](auto &&model) { return plssvm::bindings::python::util::vector_to_pyarray(std::vector{ static_cast(model.num_support_vectors()) }); }, *self.model_); }, "Number of support vectors for each class. ndarray of shape (1,), dtype=int32") + .def_property_readonly("shape_fit_", [](const svr &self) { + if (self.model_ == nullptr) { + throw py::attribute_error{ "'SVR' object has no attribute 'shape_fit_'" }; + } + + return std::visit([](auto &&data) { return std::make_tuple(static_cast(data.num_data_points()), static_cast(data.num_features())); }, *self.data_); }, "Array dimensions of training vector X. tuple of int of shape (n_dimensions_of_X,)") + .def_property_readonly("_estimator_type", [](const svr &) { return "regressor"; }, "The type of estimator. Always 'regressor' for SVR."); + + //*************************************************************************************************************************************// + // METHODS // + //*************************************************************************************************************************************// + py_svr + .def("fit", [](svr &self, plssvm::bindings::python::util::soa_matrix_wrapper data, plssvm::bindings::python::util::label_vector_wrapper labels, const std::optional> &sample_weight) -> svr & { + if (sample_weight.has_value()) { + throw py::attribute_error{ "The 'sample_weight' parameter for a call to 'fit' is not implemented yet!" }; + } + + // store the used label type + self.py_dtype_ = labels.dtype; + + // retrieve the potential feature names + self.feature_names_ = std::move(data.feature_names); + + // create the data set to fit + std::visit([&](auto &&labels_vector) { + // get the label type and possible data set types + using label_type = typename plssvm::detail::remove_cvref_t::value_type; + using possible_data_set_types = typename svr::possible_data_set_types; + // create the data set to fit + self.data_ = std::make_unique(plssvm::regression_data_set(std::move(data.matrix), std::move(labels_vector))); + }, + labels.labels); + + // fit the model using potentially provided keyword arguments + fit(self); + return self; }, "Fit the SVM model according to the given training data.", py::arg("X"), py::arg("y"), py::pos_only(), py::arg("sample_weight") = std::nullopt, py::return_value_policy::reference) + .def("get_metadata_routing", [](const svr &) { throw py::attribute_error{ "'SVR' object has no function 'get_metadata_routing' (not implemented)" }; }, "Get metadata routing of this object.") + .def("get_params", &svr::get_params, "Get parameters for this estimator.", py::arg("deep") = true) + .def("predict", [](svr &self, plssvm::soa_matrix data) -> py::array { + if (self.model_ == nullptr) { + throw py::attribute_error{ "This SVR instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator." }; + } + + return std::visit([&](auto &&model) { + // get the label type + using label_type = typename plssvm::detail::remove_cvref_t::label_type; + // create the data set to predict + const plssvm::regression_data_set data_to_predict{ std::move(data) }; + // predict the data + return plssvm::bindings::python::util::vector_to_pyarray(self.svm_->predict(model, data_to_predict)); + }, *self.model_); }, "Perform classification on samples in X.") + .def("score", [](svr &self, plssvm::soa_matrix data, plssvm::bindings::python::util::label_vector_wrapper labels, const std::optional> &sample_weight) -> plssvm::real_type { + if (sample_weight.has_value()) { + throw py::attribute_error{ "The 'sample_weight' parameter for a call to 'fit' is not implemented yet!" }; + } + if (self.model_ == nullptr) { + throw py::attribute_error{ "This SVR instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator." }; + } + + // score the data + return std::visit([&](auto &&labels_vector) { + // get the label types + using label_type = typename plssvm::detail::remove_cvref_t::value_type; + // create the data set to score + const plssvm::regression_data_set data_to_score{ std::move(data), std::move(labels_vector) }; + // score the data + try { + return self.svm_->score(std::get>(*self.model_), data_to_score); + } catch (const std::exception &) { + throw py::value_error{ fmt::format("The dtype of the labels to score is \"{}\", but the model was fitted with \"{}\". Please use the same types for fit and score!", labels.dtype.attr("name").cast(), self.py_dtype_.attr("name").cast()) }; + } + }, labels.labels); }, "Return the mean accuracy on the given test data and labels.", py::arg("X"), py::arg("y"), py::pos_only(), py::arg("sample_weight") = std::nullopt) + .def("set_fit_request", [](const svr &) { throw py::attribute_error{ "'SVR' object has no function 'set_fit_request' (not implemented)" }; }, "Request metadata passed to the fit method.") + .def("set_params", [](svr &self, const py::kwargs &args) -> svr & { + parse_provided_kwargs(self, args); + return self; }, "Set the parameters of this estimator.", py::return_value_policy::reference) + .def("set_score_request", [](const svr &) { throw py::attribute_error{ "'SVR' object has no function 'set_score_request' (not implemented)" }; }, "Request metadata passed to the score method.") + .def("__sklearn_is_fitted__", [](const svr &self) -> bool { return self.model_ != nullptr; }, "Return True if the estimator is fitted, False otherwise.") + .def("__sklearn_clone__", [](const svr &self) -> svr { + // create a new SVR instance + svr new_svr{}; + // copy the parameters + new_svr.svm_->set_params(self.svm_->get_params()); + new_svr.py_dtype_ = self.py_dtype_; + new_svr.epsilon_ = self.epsilon_; + new_svr.max_iter_ = self.max_iter_; + return new_svr; }, "Clone the estimator.") + .def("__repr__", [](const svr &self) { + // get the currently used parameters + py::dict used_params = self.get_params(true); + py::dict default_params = svr{}.get_params(true); + + std::vector non_default_values{}; + + // iterate over all available keys and check if the currently used one differs from the default one + for (auto item : used_params) { + const auto key = item.first.cast(); + + // get the values as string + const std::string used_param_str = py::str(used_params[key.c_str()]); + const std::string default_param_str = py::str(default_params[key.c_str()]); + + // check if the parameter values are identical, if not, add them to the vector + if (used_param_str != default_param_str) { + if (py::isinstance(used_params[key.c_str()])) { + non_default_values.push_back(fmt::format("{}='{}'", key, used_param_str)); + } else { + non_default_values.push_back(fmt::format("{}={}", key, used_param_str)); + } + } + } + + return fmt::format("plssvm.SVR({})", fmt::join(non_default_values, ", ")); }, "Print the SVR showing all non-default parameters."); +} diff --git a/bindings/Python/solver_types.cpp b/bindings/Python/solver_types.cpp index b175eb1f3..1c568c238 100644 --- a/bindings/Python/solver_types.cpp +++ b/bindings/Python/solver_types.cpp @@ -14,7 +14,7 @@ namespace py = pybind11; void init_solver_types(py::module_ &m) { // bind enum class - py::enum_(m, "SolverType") + py::enum_(m, "SolverType", "Enum class for all possible solver types implemented in PLSSVM.") .value("AUTOMATIC", plssvm::solver_type::automatic, "the default solver type; depends on the available device and system memory") .value("CG_EXPLICIT", plssvm::solver_type::cg_explicit, "explicitly assemble the kernel matrix on the device") .value("CG_IMPLICIT", plssvm::solver_type::cg_implicit, "implicitly calculate the kernel matrix entries in each CG iteration"); diff --git a/bindings/Python/svm/csvc.cpp b/bindings/Python/svm/csvc.cpp new file mode 100644 index 000000000..af8bc16e6 --- /dev/null +++ b/bindings/Python/svm/csvc.cpp @@ -0,0 +1,118 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "plssvm/svm/csvc.hpp" // plssvm::csvc + +#include "plssvm/classification_types.hpp" // plssvm::classification_type +#include "plssvm/constants.hpp" // plssvm::real_type +#include "plssvm/data_set/classification_data_set.hpp" // plssvm::classification_data_set +#include "plssvm/detail/type_traits.hpp" // plssvm::detail::remove_cvref_t +#include "plssvm/model/classification_model.hpp" // plssvm::classification_model +#include "plssvm/parameter.hpp" // plssvm::parameter, named parameters +#include "plssvm/solver_types.hpp" // plssvm::solver_type + +#include "bindings/Python/data_set/variant_wrapper.hpp" // plssvm::bindings::python::util::classification_data_set_wrapper +#include "bindings/Python/model/variant_wrapper.hpp" // plssvm::bindings::python::util::classification_model_wrapper +#include "bindings/Python/svm/utility.hpp" // plssvm::bindings::python::util::assemble_csvm +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{check_kwargs_for_correctness, python_type_name_mapping, vector_to_pyarray} + +#include "fmt/format.h" // fmt::format +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::kwargs, py::value_error +#include "pybind11/stl.h" // support for STL types: std::vector + +#include // std::exception +#include // std::string_view +#include // std::visit, std::get + +namespace py = pybind11; + +void init_csvc(py::module_ &m, py::module_ &pure_virtual) { + using plssvm::bindings::python::util::classification_data_set_wrapper; + using plssvm::bindings::python::util::classification_model_wrapper; + + const py::class_ py_csvc(pure_virtual, "__pure_virtual_base_CSVC"); + + // bind plssvm::make_csvm factory functions to "generic" Python C-SVC class + py::class_(m, "CSVC", py_csvc, py::module_local(), "Base class for all backend C-SVC implementations.") + // IMPLICIT BACKEND + .def(py::init([](const py::kwargs &args) { + return plssvm::bindings::python::util::assemble_csvm(args); + }), + "create an C-SVC with the provided keyword arguments") + .def(py::init([](const plssvm::parameter ¶ms, const py::kwargs &args) { + return plssvm::bindings::python::util::assemble_csvm(args, params); + }), + "create an C-SVC with the provided parameters and keyword arguments; the values in params will be overwritten by the keyword arguments") + // clang-format off + .def("fit", [](const plssvm::csvc &self, const classification_data_set_wrapper &data_set, const py::kwargs &args) -> classification_model_wrapper { + return std::visit([&](auto &&data) { + // check keyword arguments + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "epsilon", "max_iter", "classification", "solver" }); + + auto epsilon{ plssvm::real_type{ 1e-10 } }; + if (args.contains("epsilon")) { + epsilon = args["epsilon"].cast(); + } + + // can't do it with max_iter due to OAO splitting the data set + + plssvm::classification_type classification{ plssvm::classification_type::oaa }; + if (args.contains("classification")) { + classification = args["classification"].cast(); + } + + plssvm::solver_type solver{ plssvm::solver_type::automatic }; + if (args.contains("solver")) { + solver = args["solver"].cast(); + } + + if (args.contains("max_iter")) { + return classification_model_wrapper{ self.fit(data, + plssvm::epsilon = epsilon, + plssvm::max_iter = args["max_iter"].cast(), + plssvm::classification = classification, + plssvm::solver = solver) }; + } else { + return classification_model_wrapper{ self.fit(data, + plssvm::epsilon = epsilon, + plssvm::classification = classification, + plssvm::solver = solver) }; + } + }, data_set.data_set); }, "fit a model using the current C-SVC on the provided data") + .def("predict", [](const plssvm::csvc &self, const classification_model_wrapper &trained_model, const classification_data_set_wrapper &data_set) { + return std::visit([&](auto &&model) { + using label_type = typename plssvm::detail::remove_cvref_t::label_type; + try { + return plssvm::bindings::python::util::vector_to_pyarray(self.predict(model, std::get>(data_set.data_set))); + } catch (const std::exception &) { + using plssvm::bindings::python::util::python_type_name_mapping; + const std::string_view data_set_label_type = std::visit([](auto &&data) { + return python_type_name_mapping::label_type>(); + }, data_set.data_set); + throw py::value_error{ fmt::format("Mismatching label types! Trained the model with {}, but tried to predict it with {}.", python_type_name_mapping(), data_set_label_type) }; + } + }, trained_model.model); }, "predict the labels for a data set using a previously learned model") + .def("score", [](const plssvm::csvc &self, const classification_model_wrapper &trained_model) { + return std::visit([&](auto &&model) { + return self.score(model); + }, trained_model.model); }, "calculate the accuracy of the model") + .def("score", [](const plssvm::csvc &self, const classification_model_wrapper &trained_model, const classification_data_set_wrapper &data_set) { + return std::visit([&](auto &&model) { + using label_type = typename plssvm::detail::remove_cvref_t::label_type; + try { + return self.score(model, std::get>(data_set.data_set)); + } catch (const std::exception &) { + using plssvm::bindings::python::util::python_type_name_mapping; + const std::string_view data_set_label_type = std::visit([](auto &&data) { + return python_type_name_mapping::label_type>(); + }, data_set.data_set); + throw py::value_error{ fmt::format("Mismatching label types! Trained the model with {}, but tried to score it with {}.", python_type_name_mapping(), data_set_label_type) }; + } + }, trained_model.model); }, "calculate the accuracy of the model"); + // clang-format on +} diff --git a/bindings/Python/svm/csvm.cpp b/bindings/Python/svm/csvm.cpp new file mode 100644 index 000000000..88494ffef --- /dev/null +++ b/bindings/Python/svm/csvm.cpp @@ -0,0 +1,30 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "plssvm/svm/csvm.hpp" // plssvm::csvm + +#include "plssvm/parameter.hpp" // plssvm::parameter, named parameters + +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_kwargs_to_parameter} + +#include "pybind11/pybind11.h" // py::module_, py::class_, py::kwargs + +namespace py = pybind11; + +void init_csvm(py::module_ &pure_virtual) { + py::class_(pure_virtual, "__pure_virtual_base_CSVM", "Base class for all other C-SVC or C-SVR implementations.") + .def("get_params", &plssvm::csvm::get_params, "get the hyper-parameters used for this C-SVM") + .def("set_params", [](plssvm::csvm &self, const plssvm::parameter ¶ms) { self.set_params(params); }, "update the hyper-parameters used for this C-SVM using a plssvm.Parameter object") + .def("set_params", [](plssvm::csvm &self, const py::kwargs &args) { + // check keyword arguments + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); + // convert kwargs to parameter and update csvm internal parameter + self.set_params(plssvm::bindings::python::util::convert_kwargs_to_parameter(args, self.get_params())); }, "update the hyper-parameters used for this C-SVM using keyword arguments") + .def("get_target_platform", &plssvm::csvm::get_target_platform, "get the actual target platform this C-SVM runs on") + .def("num_available_devices", &plssvm::csvm::num_available_devices, "get the number of available devices for the current C-SVM"); +} diff --git a/bindings/Python/svm/csvr.cpp b/bindings/Python/svm/csvr.cpp new file mode 100644 index 000000000..f9e05378e --- /dev/null +++ b/bindings/Python/svm/csvr.cpp @@ -0,0 +1,107 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "plssvm/svm/csvr.hpp" // plssvm::csvr + +#include "plssvm/constants.hpp" // plssvm::real_type +#include "plssvm/data_set/regression_data_set.hpp" // plssvm::regression_data_set +#include "plssvm/model/regression_model.hpp" // plssvm::regression_model +#include "plssvm/parameter.hpp" // plssvm::parameter, named parameters +#include "plssvm/solver_types.hpp" // plssvm::solver_type + +#include "bindings/Python/data_set/variant_wrapper.hpp" // plssvm::bindings::python::util::regression_data_set_wrapper +#include "bindings/Python/model/variant_wrapper.hpp" // plssvm::bindings::python::util::regression_model_wrapper +#include "bindings/Python/svm/utility.hpp" // plssvm::bindings::python::util::assemble_csvm +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{check_kwargs_for_correctness, python_type_name_mapping, vector_to_pyarray} + +#include "fmt/format.h" // fmt::format +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init, py::kwargs, py::value_error +#include "pybind11/stl.h" // support for STL types: std::vector + +#include // std::exception +#include // std::string_view +#include // std::visit, std::get + +namespace py = pybind11; + +void init_csvr(py::module_ &m, py::module_ &pure_virtual) { + using plssvm::bindings::python::util::regression_data_set_wrapper; + using plssvm::bindings::python::util::regression_model_wrapper; + + const py::class_ py_csvr(pure_virtual, "__pure_virtual_base_CSVR"); + + // bind plssvm::make_csvm factory functions to "generic" Python C-SVR class + py::class_(m, "CSVR", py_csvr, py::module_local(), "Base class for all backend C-SVR implementations.") + // IMPLICIT BACKEND + .def(py::init([](const py::kwargs &args) { + return plssvm::bindings::python::util::assemble_csvm(args); + }), + "create an C-SVR with the provided keyword arguments") + .def(py::init([](const plssvm::parameter ¶ms, const py::kwargs &args) { + return plssvm::bindings::python::util::assemble_csvm(args, params); + }), + "create an C-SVR with the provided parameters and keyword arguments; the values in params will be overwritten by the keyword arguments") + // clang-format off + .def("fit", [](const plssvm::csvr &self, const regression_data_set_wrapper &data_set, const py::kwargs &args) { + return std::visit([&](auto &&data) { + // check keyword arguments + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "epsilon", "max_iter", "solver" }); + + auto epsilon{ plssvm::real_type{ 1e-10 } }; + if (args.contains("epsilon")) { + epsilon = args["epsilon"].cast(); + } + + plssvm::solver_type solver{ plssvm::solver_type::automatic }; + if (args.contains("solver")) { + solver = args["solver"].cast(); + } + + if (args.contains("max_iter")) { + return regression_model_wrapper{ self.fit(data, + plssvm::epsilon = epsilon, + plssvm::max_iter = args["max_iter"].cast(), + plssvm::solver = solver) }; + } else { + return regression_model_wrapper{ self.fit(data, + plssvm::epsilon = epsilon, + plssvm::solver = solver) }; + } + }, data_set.data_set); }, "fit a model using the current C-SVR on the provided data") + .def("predict", [](const plssvm::csvr &self, const regression_model_wrapper &trained_model, const regression_data_set_wrapper &data_set) { + return std::visit([&](auto &&model) { + using label_type = typename plssvm::detail::remove_cvref_t::label_type; + try { + return plssvm::bindings::python::util::vector_to_pyarray(self.predict(model, std::get>(data_set.data_set))); + } catch (const std::exception &) { + using plssvm::bindings::python::util::python_type_name_mapping; + const std::string_view data_set_label_type = std::visit([](auto &&data) { + return python_type_name_mapping::label_type>(); + }, data_set.data_set); + throw py::value_error{ fmt::format("Mismatching label types! Trained the model with {}, but tried to predict it with {}.", python_type_name_mapping(), data_set_label_type) }; + } + }, trained_model.model); }, "predict the labels for a data set using a previously learned model") + .def("score", [](const plssvm::csvr &self, const regression_model_wrapper &trained_model) { + return std::visit([&](auto &&model) { + return self.score(model); + }, trained_model.model); }, "calculate the accuracy of the model") + .def("score", [](const plssvm::csvr &self, const regression_model_wrapper &trained_model, const regression_data_set_wrapper &data_set) { + return std::visit([&](auto &&model) { + using label_type = typename plssvm::detail::remove_cvref_t::label_type; + try { + return self.score(model, std::get>(data_set.data_set)); + } catch (const std::exception &) { + using plssvm::bindings::python::util::python_type_name_mapping; + const std::string_view data_set_label_type = std::visit([](auto &&data) { + return python_type_name_mapping::label_type>(); + }, data_set.data_set); + throw py::value_error{ fmt::format("Mismatching label types! Trained the model with {}, but tried to score it with {}.", python_type_name_mapping(), data_set_label_type) }; + } + }, trained_model.model); }, "calculate the accuracy of the model"); + // clang-format on +} diff --git a/bindings/Python/svm/utility.hpp b/bindings/Python/svm/utility.hpp new file mode 100644 index 000000000..3cedd5cb6 --- /dev/null +++ b/bindings/Python/svm/utility.hpp @@ -0,0 +1,101 @@ +/** + * @file + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Utility functions used for creating the Pybind11 Python bindings for the C-SVM classes. + */ + +#ifndef PLSSVM_BINDINGS_PYTHON_SVM_UTILITY_HPP_ +#define PLSSVM_BINDINGS_PYTHON_SVM_UTILITY_HPP_ +#pragma once + +#include "plssvm/backend_types.hpp" // plssvm::backend_type, plssvm::determine_default_backend, plssvm::list_available_backends +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space +#include "plssvm/backends/SYCL/implementation_types.hpp" // plssvm::sycl::implementation_type +#include "plssvm/backends/SYCL/kernel_invocation_types.hpp" // plssvm::sycl::kernel_invocation_type +#include "plssvm/csvm_factory.hpp" // plssvm::make_csvm +#include "plssvm/parameter.hpp" // plssvm::parameter, named parameters +#include "plssvm/target_platforms.hpp" // plssvm::target_platform, plssvm::determine_default_target_platform, plssvm::list_available_target_platforms + +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{check_kwargs_for_correctness, convert_kwargs_to_parameter} + +#include "pybind11/pybind11.h" // py::kwargs, py::instance, py::str, py::value_error + +#include // std::unique_ptr +#include // std::istringstream +#include // std::string + +namespace py = pybind11; + +namespace plssvm::bindings::python::util { + +/** + * @brief Assemble a C-SVM (C-SVC or C-SVR based on the template parameter @p csvm_type) using the named Python arguments @p args and PLSSVM parameters @p input_params. + * @tparam csvm_type the type of the C-SVM to create + * @param[in] args the named Python arguments + * @param[in] input_params the PLSSVM parameter + * @return the created C-SVM (`[[nodiscard]]`) + */ +template +[[nodiscard]] inline std::unique_ptr assemble_csvm(const py::kwargs &args, plssvm::parameter input_params = {}) { + // check keyword arguments + plssvm::bindings::python::util::check_kwargs_for_correctness(args, { "backend", "target_platform", "kernel_type", "degree", "gamma", "coef0", "cost", "sycl_implementation_type", "sycl_kernel_invocation_type", "kokkos_execution_space" }); + // if one of the value keyword parameter is provided, set the respective value + const plssvm::parameter params = plssvm::bindings::python::util::convert_kwargs_to_parameter(args, input_params); + plssvm::backend_type backend = plssvm::determine_default_backend(); + if (args.contains("backend")) { + if (py::isinstance(args["backend"])) { + std::istringstream iss{ args["backend"].cast() }; + iss >> backend; + if (iss.fail()) { + throw py::value_error{ fmt::format("Available backends are \"{}\", got {}!", fmt::join(plssvm::list_available_backends(), ";"), args["backend"].cast()) }; + } + } else { + backend = args["backend"].cast(); + } + } + plssvm::target_platform target = plssvm::determine_default_target_platform(); + if (args.contains("target_platform")) { + if (py::isinstance(args["target_platform"])) { + std::istringstream iss{ args["target_platform"].cast() }; + iss >> target; + if (iss.fail()) { + throw py::value_error{ fmt::format("Available target platforms are \"{}\", got {}!", fmt::join(plssvm::list_available_target_platforms(), ";"), args["target_platform"].cast()) }; + } + } else { + target = args["target_platform"].cast(); + } + } + + if (backend == plssvm::backend_type::sycl) { + // parse SYCL specific keyword arguments + plssvm::sycl::implementation_type impl_type = plssvm::sycl::implementation_type::automatic; + if (args.contains("sycl_implementation_type")) { + impl_type = args["sycl_implementation_type"].cast(); + } + plssvm::sycl::kernel_invocation_type invocation_type = plssvm::sycl::kernel_invocation_type::automatic; + if (args.contains("sycl_kernel_invocation_type")) { + invocation_type = args["sycl_kernel_invocation_type"].cast(); + } + + return plssvm::make_csvm(backend, target, params, plssvm::sycl_implementation_type = impl_type, plssvm::sycl_kernel_invocation_type = invocation_type); + } else if (backend == plssvm::backend_type::kokkos) { + // parse Kokkos specific keyword arguments + plssvm::kokkos::execution_space space = plssvm::kokkos::execution_space::automatic; + if (args.contains("kokkos_execution_space")) { + space = args["kokkos_execution_space"].cast(); + } + + return plssvm::make_csvm(backend, target, params, plssvm::kokkos_execution_space = space); + } else { + return plssvm::make_csvm(backend, target, params); + } +} + +} // namespace plssvm::bindings::python::util + +#endif // PLSSVM_BINDINGS_PYTHON_SVM_UTILITY_HPP_ diff --git a/bindings/Python/svm_types.cpp b/bindings/Python/svm_types.cpp new file mode 100644 index 000000000..8bf4eaee6 --- /dev/null +++ b/bindings/Python/svm_types.cpp @@ -0,0 +1,25 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "plssvm/svm_types.hpp" // plssvm::svm_type, plssvm::list_available_svm_types, plssvm::svm_type_from_model_file + +#include "pybind11/pybind11.h" // py::module_ + +namespace py = pybind11; + +void init_svm_types(py::module_ &m) { + // bind enum class + py::enum_(m, "SVMType", "Enum class for all implemented SVM types in PLSSVM.") + .value("CSVC", plssvm::svm_type::csvc, "use a C-SVC for classification") + .value("CSVR", plssvm::svm_type::csvr, "use a C-SVR for classification"); + + // bind free functions + m.def("list_available_svm_types", &plssvm::list_available_svm_types, "list the available SVM types"); + m.def("svm_type_to_task_name", &plssvm::svm_type_to_task_name, "get the task name (e.g., \"classification\" or \"regression\") based on the provided SVMType"); + m.def("svm_type_from_model_file", &plssvm::svm_type_from_model_file, "determine the SVMType based on the provided LIBSVM model file"); +} diff --git a/bindings/Python/target_platforms.cpp b/bindings/Python/target_platforms.cpp index 1abce3833..36804bf84 100644 --- a/bindings/Python/target_platforms.cpp +++ b/bindings/Python/target_platforms.cpp @@ -15,7 +15,7 @@ namespace py = pybind11; void init_target_platforms(py::module_ &m) { // bind enum class - py::enum_(m, "TargetPlatform") + py::enum_(m, "TargetPlatform", "Enum class for all possible targets that PLSSVM supports.") .value("AUTOMATIC", plssvm::target_platform::automatic, "the default target with respect to the used backend type; checks for available devices in the following order: NVIDIA GPUs -> AMD GPUs -> Intel GPUs -> CPUs") .value("CPU", plssvm::target_platform::cpu, "target CPUs only (Intel, AMD, IBM, ...)") .value("GPU_NVIDIA", plssvm::target_platform::gpu_nvidia, "target GPUs from NVIDIA") diff --git a/bindings/Python/type_caster/label_vector_wrapper_caster.hpp b/bindings/Python/type_caster/label_vector_wrapper_caster.hpp new file mode 100644 index 000000000..0cbe37fa5 --- /dev/null +++ b/bindings/Python/type_caster/label_vector_wrapper_caster.hpp @@ -0,0 +1,340 @@ +/** + * @file + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Implements a custom type caster for a label_vector_wrapper (storing a std::vector and py::type). + */ + +#ifndef PLSSVM_BINDINGS_PYTHON_TYPE_CASTER_LABEL_VECTOR_WRAPPER_TYPE_CASTER_HPP_ +#define PLSSVM_BINDINGS_PYTHON_TYPE_CASTER_LABEL_VECTOR_WRAPPER_TYPE_CASTER_HPP_ +#pragma once + +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{vector_to_pyarray, is_pandas_data_frame, is_pandas_series} + +#include "fmt/format.h" // fmt::format +#include "pybind11/cast.h" // pybind11::detail::type_caster +#include "pybind11/numpy.h" // py::array, py::array_t, py::array::c_style, py::array::f_style, py::buffer_info +#include "pybind11/pybind11.h" // py::isinstance, py::value_error, py::list, py::hash, py::handle, py::cast, py::len, py::str, py::module_, py::object +#include "pybind11/pytypes.h" // py::dtype + +#include // std::size_t +#include // fixed-width integers +#include // std::string +#include // std::is_same_v +#include // std::unordered_map +#include // std::pair, std::make_pair, std::move +#include // std::variant, std::visit +#include // std::vector + +namespace py = pybind11; + +namespace plssvm::bindings::python::util { + +namespace impl { + +/** + * @brief A hash struct for creating the hash value of a py::type. + */ +struct py_type_hash { + std::size_t operator()(const py::type &t) const { + return py::hash(t); + } +}; + +/** + * @brief A comparison struct to check two py::type for equality. + */ +struct py_type_equal { + bool operator()(const py::type &lhs, const py::type &rhs) const { + return lhs.is(rhs); + } +}; + +} // namespace impl + +/** + * @brief Convert a Python Numpy array to a `std::vector`. + * @tparam T the type in the array + * @param[in] vec the Python Numpy array to convert + * @return the `std::vector` (`[[nodiscard]]`) + */ +template +[[nodiscard]] std::vector pyarray_to_vector(const py::array &arr) { + // check dimensions + if (arr.ndim() != 1) { + throw py::value_error{ fmt::format("The provided array must have exactly one dimension but has {}!", arr.ndim()) }; + } + + if (arr.size() == 0) { + // return an empty vector + return std::vector{}; + } else { + // convert py::array to std::vector + auto arr_t = arr.cast>(); + return std::vector(arr_t.data(0), arr_t.data(0) + arr_t.shape(0)); + } +} + +/** + * @def PLSSVM_CREATE_PYARRAY_TO_VECTOR_MAPPINGS + * @brief Create a mapping from a Python Numpy ndarray with the @p data_type to a std::vector. + */ +#define PLSSVM_CREATE_PYARRAY_TO_VECTOR_MAPPINGS(data_type) \ + if constexpr (detail::is_label_type_in_variant_v) { \ + if (type.equal(py::dtype::of())) { \ + return pyarray_to_vector(arr); \ + } \ + } + +/** + * @brief Convert a generic Python Numpy array to a `std::vector`. + * @param[in] vec the generic Python Numpy array to convert + * @return a `std::variant` containing the converted `std::vector` (`[[nodiscard]]`) + */ +template +[[nodiscard]] possible_vector_types generic_pyarray_to_vector(const py::array &arr) { + // sanity check the passed py::array + if (!(arr.flags() & py::array::c_style)) { + throw py::value_error{ "The py::array must be C-contiguous" }; + } + + // the type used in the py::array + py::dtype type = arr.dtype(); + + PLSSVM_CREATE_PYARRAY_TO_VECTOR_MAPPINGS(bool) + PLSSVM_CREATE_PYARRAY_TO_VECTOR_MAPPINGS(std::int8_t) + PLSSVM_CREATE_PYARRAY_TO_VECTOR_MAPPINGS(std::uint8_t) + PLSSVM_CREATE_PYARRAY_TO_VECTOR_MAPPINGS(std::int16_t) + PLSSVM_CREATE_PYARRAY_TO_VECTOR_MAPPINGS(std::uint16_t) + PLSSVM_CREATE_PYARRAY_TO_VECTOR_MAPPINGS(std::int32_t) + PLSSVM_CREATE_PYARRAY_TO_VECTOR_MAPPINGS(std::uint32_t) + PLSSVM_CREATE_PYARRAY_TO_VECTOR_MAPPINGS(std::int64_t) + PLSSVM_CREATE_PYARRAY_TO_VECTOR_MAPPINGS(std::uint64_t) + PLSSVM_CREATE_PYARRAY_TO_VECTOR_MAPPINGS(float) + PLSSVM_CREATE_PYARRAY_TO_VECTOR_MAPPINGS(double) + + if constexpr (detail::is_label_type_in_variant_v) { + if (type.attr("kind").cast() == "U") { + // convert py::array of strings to a std::vector + if (arr.ndim() != 1) { + throw py::value_error{ fmt::format("The provided array must have exactly one dimension but has {}!", arr.ndim()) }; + } + + if (arr.size() == 0) { + // return an empty vector + return std::vector{}; + } else { + std::vector result; + result.reserve(arr.shape(0)); + for (py::handle item : arr) { + result.push_back(py::cast(item)); + } + return result; + } + } + } + + // if we are here, no correct type has been found -> throw exception + throw py::value_error{ fmt::format("Unsupported data type: {}!", type.attr("name").cast()) }; +} + +#undef PLSSVM_CREATE_PYARRAY_T_TO_VECTOR_MAPPINGS + +/** + * @brief Convert a Python List to a `std::vector`. + * @tparam T the types in the `std::vector` + * @param[in] list list the Python List to convert + * @return the `std::vector` (`[[nodiscard]]`) + */ +template +[[nodiscard]] std::vector pylist_to_vector(const py::list &list) { + std::vector vec(py::len(list)); + for (std::size_t i = 0; i < vec.size(); ++i) { + if constexpr (std::is_same_v) { + vec[i] = list[i].cast().cast(); + } else { + vec[i] = list[i].cast(); + } + } + return vec; +} + +/** + * @def PLSSVM_CREATE_PYLIST_TO_VECTOR_MAPPINGS + * @brief Create a mapping from a Python list with the @p np_data_type to a std::vector of @p cpp_data_type. + */ +#define PLSSVM_CREATE_PYLIST_TO_VECTOR_MAPPINGS(np_data_type, cpp_data_type) \ + if constexpr (detail::is_label_type_in_variant_v) { \ + if (highest_type.equal(np.attr(np_data_type))) { \ + return std::make_pair(pylist_to_vector(list), py::dtype::of()); \ + } \ + } + +/** + * @brief Convert a Python List to a `std::vector`. The `std::vector<>::value_type` depends on the types provided in the py::list. + * @param[in] list the Python List to convert + * @return [a `std::variant` containing the converted `std::vector`, the used py::dtype] (`[[nodiscard]]`) + */ +template +[[nodiscard]] inline std::pair generic_pylist_to_vector(const py::list &list) { + const py::module_ np = py::module_::import("numpy"); + // define a precedence map, i.e., we internally use the type in the py::list with the highest precedence value + // example: [0, 1.3, np.int8(6)] -> the types are [int, float, int8] -> precedences are [8, 10, 2] -> the highest precedence is 10 -> we use float internally + const std::unordered_map precedence_map{ + { py::module_::import("builtins").attr("bool"), 0 }, + { np.attr("uint8"), 1 }, + { np.attr("int8"), 2 }, + { np.attr("uint16"), 3 }, + { np.attr("int16"), 4 }, + { np.attr("uint32"), 5 }, + { np.attr("int32"), 6 }, + { np.attr("uint64"), 7 }, + { np.attr("int64"), 8 }, + { py::module_::import("builtins").attr("int"), 8 }, + { np.attr("float32"), 9 }, + { np.attr("float64"), 10 }, + { py::module_::import("builtins").attr("float"), 10 }, + { py::module_::import("builtins").attr("str"), 11 } + }; + + // get the "super" type used internally as defined by the precedence_map + py::type highest_type{ py::module_::import("builtins").attr("bool") }; + int highest_precedence{ -1 }; + for (std::size_t i = 0; i < py::len(list); ++i) { + py::object item = list[i]; + py::type type = py::type::of(item); + int precedence = precedence_map.at(type); + if (precedence > highest_precedence) { + highest_precedence = precedence; + highest_type = type; + } + } + + // convert the py::list to a vector of the previously determined type + + PLSSVM_CREATE_PYLIST_TO_VECTOR_MAPPINGS("int8", std::int8_t) + PLSSVM_CREATE_PYLIST_TO_VECTOR_MAPPINGS("uint8", std::uint8_t) + PLSSVM_CREATE_PYLIST_TO_VECTOR_MAPPINGS("int16", std::int16_t) + PLSSVM_CREATE_PYLIST_TO_VECTOR_MAPPINGS("uint16", std::uint16_t) + PLSSVM_CREATE_PYLIST_TO_VECTOR_MAPPINGS("int32", std::int32_t) + PLSSVM_CREATE_PYLIST_TO_VECTOR_MAPPINGS("uint32", std::uint32_t) + PLSSVM_CREATE_PYLIST_TO_VECTOR_MAPPINGS("int64", std::int64_t) + PLSSVM_CREATE_PYLIST_TO_VECTOR_MAPPINGS("uint64", std::uint64_t) + PLSSVM_CREATE_PYLIST_TO_VECTOR_MAPPINGS("float32", float) + PLSSVM_CREATE_PYLIST_TO_VECTOR_MAPPINGS("float64", double) + + if constexpr (detail::is_label_type_in_variant_v) { + if (highest_type.equal(py::module_::import("builtins").attr("bool"))) { + return std::make_pair(pylist_to_vector(list), py::dtype::of()); + } + } + if constexpr (detail::is_label_type_in_variant_v) { + if (highest_type.equal(py::module_::import("builtins").attr("int"))) { + return std::make_pair(pylist_to_vector(list), py::dtype::of()); + } + } + if constexpr (detail::is_label_type_in_variant_v) { + if (highest_type.equal(py::module_::import("builtins").attr("float"))) { + return std::make_pair(pylist_to_vector(list), py::dtype::of()); + } + } + if constexpr (detail::is_label_type_in_variant_v) { + if (highest_type.equal(py::module_::import("builtins").attr("str"))) { + return std::make_pair(pylist_to_vector(list), py::dtype("U")); + } + } + + // if we are here, no correct type has been found -> throw exception + throw py::value_error{ fmt::format("Unsupported data type: {}!", highest_type.attr("__name__").cast()) }; +} + +#undef PLSSVM_CREATE_PYLIST_TO_VECTOR_MAPPINGS + +/** + * @brief A small wrapper around a std::variant containing all possible label type vectors and the actually used Python dtype. + * @tparam PossibleTypes the possible label vectors stored in a std::variant + */ +template +struct label_vector_wrapper { + /// The labels. + PossibleTypes labels{}; + /// The actually used Python dtype. + py::dtype dtype{}; +}; + +} // namespace plssvm::bindings::python::util + +namespace pybind11::detail { + +/** + * @brief A custom Pybind11 type caster to convert Python object from and to a plssvm::bindings::python::util::label_vector_wrapper. + * @tparam T the value type of the PLSSVM matrix + * @tparam layout the memory layout type of the PLSSVM matrix + */ +template +struct type_caster> { + public: + /// The type of the label vector wrapper to convert from/to. + using label_vector_wrapper_type = plssvm::bindings::python::util::label_vector_wrapper; + + /// Specify the Python type name to which a label_vector_wrapper should be converted. + PYBIND11_TYPE_CASTER(label_vector_wrapper_type, _("numpy.ndarray")); + + /** + * @brief Convert a label_vector_wrapper to a Numpy ndarray. + * @param[in] labels the labels vector to convert to a Python Numpy ndarray + * @return a Pybind11 handle to the Numpy ndarray + */ + static handle cast(const label_vector_wrapper_type &labels, return_value_policy, handle) { + // convert a generic std::vector to a Numpy ndarray + return std::visit([](auto &&vec) { return plssvm::bindings::python::util::vector_to_pyarray(vec).release(); }, labels.labels); + } + + /** + * @brief Try converting a Python object @p obj to a label_vector_wrapper. + * @param[in] obj the object to convert + * @return `true` if the conversion was successful, `false` otherwise + */ + bool load(handle obj, bool) { + if (py::isinstance(obj)) { + // provided obj is a Python list + auto [labels, dtype] = plssvm::bindings::python::util::generic_pylist_to_vector(py::cast(obj)); + value.labels = std::move(labels); + value.dtype = dtype; + } else { + py::array arr{}; + if (py::isinstance(obj)) { + // provided obj is a numpy array + arr = obj.cast(); + } else if (plssvm::bindings::python::util::is_pandas_series(obj)) { + // provided obj is a Pandas Series + arr = obj.attr("values").cast(); + } else if (plssvm::bindings::python::util::is_pandas_data_frame(obj)) { + // provided obj is a Pandas DataFrame + arr = obj.attr("values").cast(); + arr = arr.reshape({ arr.size() }); + } else { + throw py::value_error{ fmt::format("Unsupported data type: {}", std::string{ py::str(obj.get_type().attr("__name__")) }) }; + } + + // sanity check the number of elements in the numpy array + if (arr.ndim() != 1) { + throw py::value_error{ fmt::format("Found array with dim {}. SVC expected == 1.", arr.ndim()) }; + } + + // convert te Python Numpy array to a std::vector + value.labels = plssvm::bindings::python::util::generic_pyarray_to_vector(arr); + value.dtype = arr.dtype(); + } + + return true; + } +}; + +} // namespace pybind11::detail + +#endif // PLSSVM_BINDINGS_PYTHON_TYPE_CASTER_LABEL_VECTOR_WRAPPER_TYPE_CASTER_HPP_ diff --git a/bindings/Python/type_caster/matrix_type_caster.hpp b/bindings/Python/type_caster/matrix_type_caster.hpp new file mode 100644 index 000000000..0b664d6ec --- /dev/null +++ b/bindings/Python/type_caster/matrix_type_caster.hpp @@ -0,0 +1,269 @@ +/** + * @file + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Implements a custom type caster for a plssvm::matrix. + */ + +#ifndef PLSSVM_BINDINGS_PYTHON_TYPE_CASTER_MATRIX_TYPE_CASTER_HPP_ +#define PLSSVM_BINDINGS_PYTHON_TYPE_CASTER_MATRIX_TYPE_CASTER_HPP_ +#pragma once + +#include "plssvm/constants.hpp" // plssvm::PADDING_SIZE +#include "plssvm/detail/string_conversion.hpp" // plssvm::detail::convert_to +#include "plssvm/matrix.hpp" // plssvm::matrix, plssvm::layout_type +#include "plssvm/shape.hpp" // plssvm::shape + +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::{is_pandas_data_frame, is_scipy_sparse_matrix, is_c_contiguous, is_f_contiguous} + +#include "fmt/format.h" // fmt::format +#include "pybind11/cast.h" // pybind11::detail::type_caster +#include "pybind11/gil.h" // py::gil_scoped_release +#include "pybind11/numpy.h" // py::array, py::array_t, py::array::c_style, py::array::f_style +#include "pybind11/pybind11.h" // py::buffer_info, py::isinstance, py::value_error +#include "pybind11/pytypes.h" // py::list, py::str + +#include // std::size_t +#include // std::memcpy +#include // std::string +#include // std::conditional_t + +namespace py = pybind11; + +namespace pybind11::detail { + +/** + * @brief A custom Pybind11 type caster to convert Python object from and to a plssvm::matrix. + * @tparam T the value type of the PLSSVM matrix + * @tparam layout the memory layout type of the PLSSVM matrix + */ +template +struct type_caster> { + public: + /// The type of the matrix to convert from/to. + using matrix_type = plssvm::matrix; + + /// Specify the Python type name to which a plssvm::matrix should be converted. + PYBIND11_TYPE_CASTER(matrix_type, _("numpy.ndarray")); + + /** + * @brief Convert a plssvm::matrix to a Numpy ndarray. + * @details If the PLSSVM matrix's memory layout is AoS, uses a Numpy ndarray with c_style layout, + * if the PLSSVM matrix's memory layout is SoA, uses a Numpy ndarray with f_style layout. + * @param[in] matr the PLSSVM matrix to convert to a Numpy ndarray + * @return a Pybind11 handle to the Numpy ndarray + */ + static handle cast(const matrix_type &matr, return_value_policy, handle) { + const std::size_t num_data_points = matr.num_rows(); + const std::size_t num_features = matr.num_cols(); + + // determine the numpy array type based on the matrix layout + using py_array_type = std::conditional_t, py::array_t>; + + // create the Python numpy array + py_array_type arr({ num_data_points, num_features }); + py::buffer_info buffer = arr.request(); + T *ptr = static_cast(buffer.ptr); + + // check if the provided matrix has padding entries -> must be removed + if (matr.is_padded()) { + // padding entries found -> copy data row-wise to the Python numpy array + if constexpr (layout == plssvm::layout_type::aos) { + for (std::size_t row = 0; row < num_data_points; ++row) { + std::memcpy(ptr + row * num_features, matr.data() + row * matr.num_cols_padded(), num_features * sizeof(T)); + } + } else { + for (std::size_t col = 0; col < num_features; ++col) { + std::memcpy(ptr + col * num_data_points, matr.data() + col * matr.num_rows_padded(), num_data_points * sizeof(T)); + } + } + } else { + // can memcpy data directly + std::memcpy(ptr, matr.data(), matr.size() * sizeof(T)); + } + + // transfer ownership to Python + return arr.release(); + } + + /** + * @brief Convert the @p arr to a plssvm::matrix and set the type caster's internal value. + * @tparam Flags the Pybind11 Numpy array flags used in @p arr + * @param[in] arr the Numpy array to convert + * @return `true` if the conversion was successful, `false` otherwise + */ + template + bool copy_pyarray_to_matrix(const py::array_t &arr) { + // get dimensions + const std::size_t num_rows = arr.shape(0); + const std::size_t num_cols = arr.shape(1); + + // note: the conversions use OpenMP -> remove Python's Global Interpreter Lock + const py::gil_scoped_release release; + + // get the underlying raw memory + py::buffer_info buffer = arr.request(); + const T *ptr = static_cast(buffer.ptr); + + // check the memory layout of the Python Numpy array + if constexpr (static_cast(Flags & py::array::c_style)) { + // the provided Python Numpy array has C style layout + if constexpr (layout == plssvm::layout_type::aos) { + // memory layout of Python Numpy array and PLSSVM matrix are the same -> can use memcpy to convert +#pragma omp parallel for + for (std::size_t row = 0; row < num_rows; ++row) { + std::memcpy(value.data() + row * value.num_cols_padded(), ptr + row * num_cols, num_cols * sizeof(T)); + } + } else if constexpr (layout == plssvm::layout_type::soa) { + // the memory layouts don't match -> must use loops to convert layouts +#pragma omp parallel for collapse(2) + for (std::size_t row = 0; row < num_rows; ++row) { + for (std::size_t col = 0; col < num_cols; ++col) { + value(row, col) = ptr[row * num_cols + col]; + } + } + } else { + // unsupported PLSSVM matrix memory layout + return false; + } + } else if constexpr (static_cast(Flags & py::array::f_style)) { + if constexpr (layout == plssvm::layout_type::aos) { + // the memory layouts don't match -> must use loops to convert layouts +#pragma omp parallel for collapse(2) + for (std::size_t row = 0; row < num_rows; ++row) { + for (std::size_t col = 0; col < num_cols; ++col) { + value(row, col) = ptr[col * num_rows + row]; + } + } + } else if constexpr (layout == plssvm::layout_type::soa) { + // memory layout of Python Numpy array and PLSSVM matrix are the same -> can use memcpy to convert +#pragma omp parallel for + for (std::size_t row = 0; row < num_cols; ++row) { + std::memcpy(value.data() + row * value.num_rows_padded(), ptr + row * num_rows, num_rows * sizeof(T)); + } + } else { + // unsupported PLSSVM matrix memory layout + return false; + } + } else { + // should not be reached since we fix this case already in the callee function + return false; + } + + return true; + } + + /** + * @brief Try converting a Python object @p obj to a plssvm::matrix. + * @detauls Honors different Numpy ndarray memory layouts (c_style or f_style) and PLSSVM matrix layout types. + * @param[in] obj the object to convert + * @return `true` if the conversion was successful, `false` otherwise + * @throws py::value_error if the provided Python list is empty (or one-dimensional) + * @throws py::value_error if the provided 2D Python list has inhomogeneous shape + * @throws py::value_error if @p obj is not a Numpy ndarray, Pandas DataFrame, SciPy sparse matrix, or Python 2D list + * @throws py::value_error if the Numpy ndarray doesn't have a two-dimensional shape + */ + bool load(handle obj, bool) { + // special case py::list + if (py::isinstance(obj)) { + // provided obj is a Python list -> check if it is a correct py::list of py::list + // convert to py::list + const auto &list = obj.cast(); + if (list.empty()) { + throw py::value_error{ "Expected 2D array, got 1D array instead!" }; + } + + // iterate over py::list + const std::size_t num_rows = list.size(); + const std::size_t num_cols = list[0].cast().size(); + + // create the matrix with the expected size + value = matrix_type{ plssvm::shape{ num_rows, num_cols }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } }; + + // fill the matrix + for (std::size_t row = 0; row < num_rows; ++row) { + // get the sublist + auto sublist = list[row].cast(); + // check if the number of values in the sublist is correct + if (num_cols != sublist.size()) { + throw py::value_error{ "setting an array element with a sequence. The requested array has an inhomogeneous shape." }; + } + // add list values to the result matrix + for (std::size_t col = 0; col < num_cols; ++col) { + if (py::isinstance(sublist[col])) { + // cast py::str to a type T via using a std::string + value(row, col) = plssvm::detail::convert_to(sublist[col].cast()); + } else { + // directly cast the value to a type T + value(row, col) = sublist[col].cast(); + } + } + } + } else { + py::array arr{}; + if (py::isinstance(obj)) { + // provided obj is a numpy array + arr = obj.cast(); + } else if (plssvm::bindings::python::util::is_pandas_data_frame(obj)) { + // provided obj is a Pandas DataFrame + arr = obj.attr("values").cast(); + } else if (plssvm::bindings::python::util::is_scipy_sparse_matrix(obj)) { + // provided obj is a SciPy sparse matrix + arr = obj.attr("toarray")().cast(); + } else { + throw py::value_error{ fmt::format("Unsupported data type: {}", std::string{ py::str(obj.get_type().attr("__name__")) }) }; + } + + // sanity check the number of elements in the numpy array + if (arr.ndim() > 2) { + throw py::value_error{ fmt::format("Found array with dim {}. SVC expected <= 2.", arr.ndim()) }; + } + if (arr.ndim() == 1) { + throw py::value_error{ "Expected 2D array, got 1D array instead." }; + } + if (arr.size() == 0) { + throw py::value_error{ fmt::format("Found array with 0 sample(s) (shape=({}, {})) while a minimum of 1 is required by SVC.", arr.shape(0), arr.shape(1)) }; + } + + // get dimensions + const std::size_t num_rows = arr.shape(0); + const std::size_t num_cols = arr.shape(1); + + // create PLSSVM matrix with the correct dimensions AND padding entries + value = matrix_type{ plssvm::shape{ num_rows, num_cols }, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } }; + + // get the underlying buffer + py::buffer_info buffer = arr.request(); + + // check the memory layout of the Python Numpy array + if (plssvm::bindings::python::util::is_c_contiguous(buffer)) { + // array is already c_style -> no need to force cast + return copy_pyarray_to_matrix(arr.cast>()); + } else if (plssvm::bindings::python::util::is_f_contiguous(buffer)) { + // array is already f_style -> no need to force cast + return copy_pyarray_to_matrix(arr.cast>()); + } else { + // array is non-contiguous + if constexpr (layout == plssvm::layout_type::aos) { + // if we want to get a PLSSVM matrix in AoS layout, force casting to c_style is more performant + return copy_pyarray_to_matrix(arr.cast>()); + } else if constexpr (layout == plssvm::layout_type::soa) { + // if we want to get a PLSSVM matrix in SoA layout, force casting to f_style is more performant + return copy_pyarray_to_matrix(arr.cast>()); + } else { + return false; + } + } + } + + return true; + } +}; + +} // namespace pybind11::detail + +#endif // PLSSVM_BINDINGS_PYTHON_TYPE_CASTER_MATRIX_TYPE_CASTER_HPP_ diff --git a/bindings/Python/type_caster/matrix_wrapper_type_caster.hpp b/bindings/Python/type_caster/matrix_wrapper_type_caster.hpp new file mode 100644 index 000000000..75445dce2 --- /dev/null +++ b/bindings/Python/type_caster/matrix_wrapper_type_caster.hpp @@ -0,0 +1,128 @@ +/** + * @file + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Implements a custom type caster for a matrix wrapper (storing a plssvm::matrix and optional feature names). + */ + +#ifndef PLSSVM_BINDINGS_PYTHON_TYPE_CASTER_MATRIX_WRAPPER_TYPE_CASTER_HPP_ +#define PLSSVM_BINDINGS_PYTHON_TYPE_CASTER_MATRIX_WRAPPER_TYPE_CASTER_HPP_ +#pragma once + +#include "plssvm/matrix.hpp" // plssvm::matrix, plssvm::layout_type + +#include "bindings/Python/type_caster/matrix_type_caster.hpp" // custom plssvm::matrix type caster +#include "bindings/Python/utility.hpp" // plssvm::bindings::python::util::is_pandas_data_frame + +#include "pybind11/cast.h" // pybind11::detail::type_caster +#include "pybind11/pybind11.h" // py::isinstance, py::value_error +#include "pybind11/pytypes.h" // py::list, py::str + +#include // std::optional, std::make_optional, std::nullopt +#include // std::string +#include // std::move +#include // std::vector + +namespace py = pybind11; + +namespace plssvm::bindings::python::util { + +/** + * @brief A small wrapper around a plssvm::matrix that also allows us to store potential feature names. + * @tparam T the value type of the PLSSVM matrix + * @tparam layout the memory layout of the PLSSVM matrix + */ +template +struct matrix_wrapper { + /// The PLSSVM matrix. + plssvm::matrix matrix{}; + /// The optionally available feature names. + std::optional> feature_names{}; +}; + +/** + * @brief Shortcut for a matrix_wrapper storing a plssvm::matrix in the Array-of-Structs layout. + */ +template +using aos_matrix_wrapper = matrix_wrapper; + +/** + * @brief Shortcut for a matrix_wrapper storing a plssvm::matrix in the Struct-of-Arrays layout. + */ +template +using soa_matrix_wrapper = matrix_wrapper; + +} // namespace plssvm::bindings::python::util + +namespace pybind11::detail { + +/** + * @brief A custom Pybind11 type caster to convert Python object from and to a plssvm::bindings::python::util::matrix_wrapper. + * @tparam T the value type of the PLSSVM matrix + * @tparam layout the memory layout type of the PLSSVM matrix + */ +template +struct type_caster> { + public: + /// The type of the matrix wrapper to convert from/to. + using matrix_type = plssvm::bindings::python::util::matrix_wrapper; + + /// Specify the Python type name to which a matrix_wrapper should be converted. + PYBIND11_TYPE_CASTER(matrix_type, _("numpy.ndarray")); + + /** + * @brief Convert a matrix_wrapper to a Numpy ndarray. Simply calls the custom type caster for a plssvm::matrix. + * @param[in] matr the PLSSVM matrix to convert to a Numpy ndarray + * @return a Pybind11 handle to the Numpy ndarray + */ + static handle cast(const matrix_type &matr, return_value_policy, handle) { + return py::cast(matr.matrix); + } + + /** + * @brief Try converting a Python object @p obj to a matrix_wrapper. + * @detauls Calls the custom type caster for a plssvm::matrix and, additionally, tries to gather the feature names. + * @param[in] obj the object to convert + * @return `true` if the conversion was successful, `false` otherwise + * @throws py::value_error all exceptions from the custom plssvm::matrix type caster + * @throws py::value_error if not all column names are strings + */ + bool load(handle obj, bool) { + // convert the object to a plssvm::matrix + value.matrix = obj.cast>(); + + if (plssvm::bindings::python::util::is_pandas_data_frame(obj)) { + // check whether column names can be set + if (py::hasattr(obj, "columns")) { + const auto &list = obj.attr("columns").cast(); + std::vector column_names{}; + column_names.reserve(list.size()); + for (py::handle item : list) { + // note: column names are only set if they are ALL strings + if (!py::isinstance(item)) { + throw py::type_error{ + "Feature names are only supported if all input features have string names. If you want feature names to be stored and validated, " + "you must convert them all to strings, by using X.columns = X.columns.astype(str) for example. Otherwise you can remove feature / " + "column names from your input data, or convert them all to a non-string data type." + }; + } + column_names.push_back(item.cast()); + } + // set the column names in the matrix_wrapper + value.feature_names = std::make_optional(std::move(column_names)); + } else { + value.feature_names = std::nullopt; + } + } + + return true; + } +}; + +} // namespace pybind11::detail + +#endif // PLSSVM_BINDINGS_PYTHON_TYPE_CASTER_MATRIX_WRAPPER_TYPE_CASTER_HPP_ diff --git a/bindings/Python/utility.hpp b/bindings/Python/utility.hpp index 770b64c72..66d5bb617 100644 --- a/bindings/Python/utility.hpp +++ b/bindings/Python/utility.hpp @@ -11,167 +11,82 @@ #ifndef PLSSVM_BINDINGS_PYTHON_UTILITY_HPP_ #define PLSSVM_BINDINGS_PYTHON_UTILITY_HPP_ - #pragma once -#include "plssvm/constants.hpp" // plssvm::real_type, plssvm::PADDING_SIZE -#include "plssvm/detail/utility.hpp" // plssvm::detail::contains -#include "plssvm/gamma.hpp" // plssvm::gamma_type -#include "plssvm/matrix.hpp" // plssvm::matrix, plssvm::layout_type -#include "plssvm/parameter.hpp" // plssvm::parameter -#include "plssvm/shape.hpp" // plssvm::shape +#include "plssvm/constants.hpp" // plssvm::real_type +#include "plssvm/detail/type_traits.hpp" // plssvm::detail::remove_cvref_t +#include "plssvm/detail/utility.hpp" // plssvm::detail::contains +#include "plssvm/gamma.hpp" // plssvm::gamma_type +#include "plssvm/parameter.hpp" // plssvm::parameter -#include "fmt/format.h" // fmt::format -#include "pybind11/buffer_info.h" // py::buffer_info -#include "pybind11/numpy.h" // py::array_t -#include "pybind11/pybind11.h" // py::kwargs, py::value_error, py::exception, py::str, py::set_error -#include "pybind11/pytypes.h" // py::list -#include "pybind11/stl.h" // support for STL types +#include "fmt/format.h" // fmt::format +#include "pybind11/numpy.h" // py::array, py::array_t, py::buffer_info, py::array::c_style +#include "pybind11/pybind11.h" // py::kwargs, py::value_error, py::isinstance, py::str, py::module_, py::register_exception_translator, py::set_error, py::object, py::len +#include "pybind11/pytypes.h" // py::type, py::ssize_t +#include // fixed-width integers #include // std::memcpy #include // std::exception_ptr, std::rethrow_exception #include // std::istringstream #include // std::string #include // std::string_view -#include // std::is_same_v, std::conditional_t +#include // std::is_same_v, std::false_type +#include // std::forward +#include // std::variant #include // std::vector namespace py = pybind11; +namespace plssvm::bindings::python::util { + +namespace detail { + /** - * @brief Convert a `std::vector` to a Python Numpy array. - * @tparam T the type in the array - * @param[in] vec the vector to convert - * @return the Python Numpy array (`[[nodiscard]]`) + * @brief Base case for a type having a label_type typedef. */ template -[[nodiscard]] auto vector_to_pyarray(const std::vector &vec) { - if constexpr (std::is_same_v) { - py::list l{}; - for (const std::string &str : vec) { - l.append(str); - } - return py::array{ l }; - } else { - py::array_t py_array(vec.size()); - py::buffer_info buffer = py_array.request(); - T *ptr = static_cast(buffer.ptr); - if constexpr (std::is_same_v) { - // can't use memcpy with std::vector - for (typename std::vector::size_type i = 0; i < vec.size(); ++i) { - ptr[i] = vec[i]; - } - } else { - // use plain memcpy - std::memcpy(ptr, vec.data(), vec.size() * sizeof(T)); - } - return py_array; - } -} +struct get_label_type { + using type = typename T::label_type; +}; /** - * @brief Convert a `plssvm::matrix` to a Python Numpy array. - * @tparam T the type in the array - * @param[in] mat the matrix to convert - * @return the Python Numpy array (`[[nodiscard]]`) + * @brief Specialization for a std::vector. */ -template -[[nodiscard]] auto matrix_to_pyarray(const plssvm::matrix &mat) { - using size_type = typename plssvm::matrix::size_type; - const size_type num_data_points = mat.num_rows(); - const size_type num_features = mat.num_cols(); - - using py_array_type = std::conditional_t, py::array_t>; - - py_array_type py_array({ num_data_points, num_features }); - py::buffer_info buffer = py_array.request(); - T *ptr = static_cast(buffer.ptr); - if (mat.is_padded()) { - // must remove padding entries before copying to Python numpy array - const plssvm::matrix mat_without_padding{ mat, plssvm::shape{ 0, 0 } }; - std::memcpy(ptr, mat_without_padding.data(), mat.size() * sizeof(T)); - } else { - // can memcpy data directly - std::memcpy(ptr, mat.data(), mat.size() * sizeof(T)); - } - return py_array; -} +template +struct get_label_type> { + using type = T; +}; /** - * @brief Convert a Python Numpy array to a `std::vector`. - * @tparam T the type in the array - * @param[in] vec the Python Numpy array to convert - * @return the `std::vector` (`[[nodiscard]]`) + * @brief Get the label type from @p T. If @p T is a std::vector, uses the std::vector<>::value_type, otherwise directly uses the member label_type typedef. + * @tparam T the type to get the label type from */ template -[[nodiscard]] std::vector pyarray_to_vector(const py::array_t &vec) { - // check dimensions - if (vec.ndim() != 1) { - throw py::value_error{ fmt::format("the provided array must have exactly one dimension but has {}!", vec.ndim()) }; - } - - // convert py::array to std::vector - return std::vector(vec.data(0), vec.data(0) + vec.shape(0)); -} +using get_label_type_t = typename get_label_type>::type; /** - * @brief Convert a Python Numpy array to a `std::vector`. - * @tparam T the type in the array - * @param[in] vec the Python Numpy array to convert - * @return the `std::vector` (`[[nodiscard]]`) + * @brief Base false case. */ -template -[[nodiscard]] std::vector pyarray_to_string_vector(const py::array_t &vec) { - // check dimensions - if (vec.ndim() != 1) { - throw py::value_error{ fmt::format("the provided array must have exactly one dimension but has {}!", vec.ndim()) }; - } - - // convert labels to strings - std::vector tmp(vec.shape(0)); - for (std::vector::size_type i = 0; i < tmp.size(); ++i) { - tmp[i] = fmt::format("{}", *vec.data(i)); - } - - return tmp; -} +template +struct is_label_type_in_variant : std::false_type { }; /** - * @brief Convert a Python List to a `std::vector`. - * @param[in] list the Python List to convert - * @return the `std::vector` (`[[nodiscard]]`) + * @brief Specialization for a std::variant. Checks for the types using logical or via fold expression. */ -[[nodiscard]] inline std::vector pylist_to_string_vector(const py::list &list) { - // convert a Python list containing strings to a std::vector - std::vector tmp(py::len(list)); - for (std::vector::size_type i = 0; i < tmp.size(); ++i) { - tmp[i] = list[i].cast().cast(); - } - - return tmp; -} +template +struct is_label_type_in_variant> { + constexpr static bool value = ((std::is_same_v>) || ...); +}; /** - * @brief Convert a Python Numpy array to a `plssvm::aos_matrix`. - * @tparam T the type in the array - * @param[in] mat the 2D Python Numpy matrix to convert - * @return the `plssvm::aos_matrix` (`[[nodiscard]]`) + * @brief Check whether @p T is a label type in @p Variant. + * @tparam T the type to check + * @tparam Variant the variant type that should contain the label type @p T */ -template -[[nodiscard]] plssvm::aos_matrix pyarray_to_matrix(const py::array_t &mat) { - // TODO: if C++20 is available, use templated lambdas to also support f_style arrays (template) - using size_type = typename plssvm::aos_matrix::size_type; - // check dimensions - if (mat.ndim() != 2) { - throw py::value_error{ fmt::format("the provided matrix must have exactly two dimensions but has {}!", mat.ndim()) }; - } +template +constexpr bool is_label_type_in_variant_v = is_label_type_in_variant::value; - // convert py::array to plssvm::matrix - py::buffer_info buffer = mat.request(); - T *ptr = static_cast(buffer.ptr); - plssvm::aos_matrix tmp{ plssvm::shape{ static_cast(mat.shape(0)), static_cast(mat.shape(1)) }, ptr, plssvm::shape{ plssvm::PADDING_SIZE, plssvm::PADDING_SIZE } }; - return tmp; -} +} // namespace detail /** * @brief Check that the Python kwargs @p args only contain keyword arguments with names present in @p valid_named_args. @@ -182,7 +97,7 @@ template inline void check_kwargs_for_correctness(const py::kwargs &args, const std::vector &valid_named_args) { for (const auto &[key, value] : args) { if (!plssvm::detail::contains(valid_named_args, key.cast())) { - throw py::value_error(fmt::format("got an unexpected keyword argument '{}'", key.cast())); + throw py::value_error{ fmt::format("got an unexpected keyword argument '{}'", key.cast()) }; } } } @@ -207,7 +122,7 @@ inline void check_kwargs_for_correctness(const py::kwargs &args, const std::vect } else { const auto gamma = args["gamma"].cast(); if (gamma <= plssvm::real_type{ 0.0 }) { - throw py::value_error{ fmt::format("gamma value must be > 0; {} is invalid. Use a positive number or use 'auto' to set gamma to a value of 1 / n_features.", gamma) }; + throw py::value_error{ fmt::format("gamma value must be > 0; {} is invalid. Use a positive number or use 'scale' or 'auto'.", gamma) }; } return gamma; } @@ -260,15 +175,13 @@ void register_py_exception(py::module_ &m, const std::string &py_exception_name, }); } -namespace detail { - /** - * @def PLSSVM_CREATE_NUMPY_NAME_MAPPING + * @def PLSSVM_CREATE_PYTHON_TYPE_NAME_MAPPING * @brief Map the @p type to its Numpy type name pendant @p numpy_name. */ -#define PLSSVM_CREATE_NUMPY_NAME_MAPPING(type, numpy_name) \ - template <> \ - [[nodiscard]] constexpr inline std::string_view numpy_name_mapping() { return numpy_name; } +#define PLSSVM_CREATE_PYTHON_TYPE_NAME_MAPPING(type, numpy_name) \ + template <> \ + [[nodiscard]] constexpr std::string_view python_type_name_mapping() { return numpy_name; } /** * @brief Tries to convert the given type to its Numpy name. @@ -277,38 +190,240 @@ namespace detail { * @return the name of `T` (`[[nodiscard]]`) */ template -[[nodiscard]] constexpr inline std::string_view numpy_name_mapping() = delete; - -PLSSVM_CREATE_NUMPY_NAME_MAPPING(bool, "bool") -PLSSVM_CREATE_NUMPY_NAME_MAPPING(char, "char") -PLSSVM_CREATE_NUMPY_NAME_MAPPING(signed char, "byte") -PLSSVM_CREATE_NUMPY_NAME_MAPPING(unsigned char, "ubyte") -PLSSVM_CREATE_NUMPY_NAME_MAPPING(short, "short") -PLSSVM_CREATE_NUMPY_NAME_MAPPING(unsigned short, "ushort") -PLSSVM_CREATE_NUMPY_NAME_MAPPING(int, "intc") -PLSSVM_CREATE_NUMPY_NAME_MAPPING(unsigned int, "uintc") -PLSSVM_CREATE_NUMPY_NAME_MAPPING(long, "int") -PLSSVM_CREATE_NUMPY_NAME_MAPPING(unsigned long, "uint") -PLSSVM_CREATE_NUMPY_NAME_MAPPING(long long, "longlong") -PLSSVM_CREATE_NUMPY_NAME_MAPPING(unsigned long long, "ulonglong") -PLSSVM_CREATE_NUMPY_NAME_MAPPING(float, "float") -PLSSVM_CREATE_NUMPY_NAME_MAPPING(double, "double") -PLSSVM_CREATE_NUMPY_NAME_MAPPING(long double, "longdouble") -PLSSVM_CREATE_NUMPY_NAME_MAPPING(std::string, "string") - -#undef PLSSVM_CREATE_NUMPY_NAME_MAPPING +[[nodiscard]] constexpr std::string_view python_type_name_mapping() = delete; -} // namespace detail +// map all our supported types +PLSSVM_CREATE_PYTHON_TYPE_NAME_MAPPING(bool, "bool") +PLSSVM_CREATE_PYTHON_TYPE_NAME_MAPPING(char, "np.byte") +PLSSVM_CREATE_PYTHON_TYPE_NAME_MAPPING(signed char, "np.int8") +PLSSVM_CREATE_PYTHON_TYPE_NAME_MAPPING(unsigned char, "np.uint8") +PLSSVM_CREATE_PYTHON_TYPE_NAME_MAPPING(short, "np.int16") +PLSSVM_CREATE_PYTHON_TYPE_NAME_MAPPING(unsigned short, "np.uint16") +PLSSVM_CREATE_PYTHON_TYPE_NAME_MAPPING(int, "np.int32") +PLSSVM_CREATE_PYTHON_TYPE_NAME_MAPPING(unsigned int, "np.uint32") +PLSSVM_CREATE_PYTHON_TYPE_NAME_MAPPING(long, "np.int64") +PLSSVM_CREATE_PYTHON_TYPE_NAME_MAPPING(unsigned long, "np.uint64") +PLSSVM_CREATE_PYTHON_TYPE_NAME_MAPPING(long long, "np.int64") +PLSSVM_CREATE_PYTHON_TYPE_NAME_MAPPING(unsigned long long, "np.uint64") +PLSSVM_CREATE_PYTHON_TYPE_NAME_MAPPING(float, "np.float32") +PLSSVM_CREATE_PYTHON_TYPE_NAME_MAPPING(double, "np.float64") +PLSSVM_CREATE_PYTHON_TYPE_NAME_MAPPING(long double, "np.longdouble") +PLSSVM_CREATE_PYTHON_TYPE_NAME_MAPPING(std::string, "str") + +#undef PLSSVM_CREATE_PYTHON_TYPE_NAME_MAPPING /** - * @brief Append the type information to the base @p class_name. - * @tparam label_type the type of the labels to convert to its Numpy name - * @param class_name the base class name (the type names are appended to it) - * @return the unique class name + * @brief Depending on the Python @p type, construct a new @p Instance of the @p PossibleTypes using the provided parameters @p args. + * @details Checks all supported Python types. If a theoretically supported type is not present in @p PossibleTypes, the type is skipped with a constexpr if. + * @tparam Instance the type of the object to create + * @tparam PossibleTypes all possible types that could be created using a std::variant + * @tparam Args the type of the constructor parameters forwarded to the constructor of @p Instance. + * @param[in] type the dynamic Python type used to determine the used label type + * @param[in] args the parameters forwarded to the @p Instance constructor + * @return the constructed @p Instance wrapped in a std::variant of type @p PossibleTypes (`[[nodiscard]]`) */ -template -[[nodiscard]] inline std::string assemble_unique_class_name(const std::string_view class_name) { - return fmt::format("{}_{}", class_name, detail::numpy_name_mapping()); +template