From 0e2c3e5db41b6b2af4038734c84ab855ccaaa5f0 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 11 Oct 2023 21:05:31 -0700 Subject: [PATCH] Add pybind11/gil_safe_call_once.h (to fix deadlocks in pybind11/numpy.h) (#4877) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * LazyInitializeAtLeastOnceDestroyNever v1 * Go back to using `union` as originally suggested by jbms@. The trick (also suggested by jbms@) is to add empty ctor + dtor. * Revert "Go back to using `union` as originally suggested by jbms@. The trick (also suggested by jbms@) is to add empty ctor + dtor." This reverts commit e7b8c4f0fcd72191e88d1c17abf5da08fe3a9c6f. * Remove `#include ` * `include\pybind11/numpy.h(24,10): fatal error C1083: Cannot open include file: 'stdalign.h': No such file or directory` * @tkoeppe wrote: this is a C interop header (and we're not writing C) * Suppress gcc 4.8.5 (CentOS 7) warning. ``` include/pybind11/eigen/../numpy.h:63:53: error: dereferencing type-punned pointer will break strict-aliasing rules [-Werror=strict-aliasing] return *reinterpret_cast(value_storage_); ^ ``` * Replace comments: Document PRECONDITION. Adopt comment suggested by @tkoeppe: https://github.com/pybind/pybind11/pull/4877#discussion_r1350356093 * Adopt suggestion by @tkoeppe: * https://github.com/pybind/pybind11/pull/4877#issuecomment-1752969127 * https://godbolt.org/z/Wa79nKz6e * Add `PYBIND11_CONSTINIT`, but it does not work for the current use cases: ``` g++ -o pybind11/tests/test_numpy_array.os -c -std=c++20 -fPIC -fvisibility=hidden -O0 -g -Wall -Wextra -Wconversion -Wcast-qual -Wdeprecated -Wundef -Wnon-virtual-dtor -Wunused-result -Werror -isystem /usr/include/python3.11 -isystem /usr/include/eigen3 -DPYBIND11_STRICT_ASSERTS_CLASS_HOLDER_VS_TYPE_CASTER_MIX -DPYBIND11_ENABLE_TYPE_CASTER_ODR_GUARD_IF_AVAILABLE -DPYBIND11_TEST_BOOST -Ipybind11/include -I/usr/local/google/home/rwgk/forked/pybind11/include -I/usr/local/google/home/rwgk/clone/pybind11/include /usr/local/google/home/rwgk/forked/pybind11/tests/test_numpy_array.cpp ``` ``` In file included from /usr/local/google/home/rwgk/forked/pybind11/tests/test_numpy_array.cpp:10: /usr/local/google/home/rwgk/forked/pybind11/include/pybind11/numpy.h: In static member function ‘static pybind11::detail::npy_api& pybind11::detail::npy_api::get()’: /usr/local/google/home/rwgk/forked/pybind11/include/pybind11/numpy.h:258:82: error: ‘constinit’ variable ‘api_init’ does not have a constant initializer 258 | PYBIND11_CONSTINIT static LazyInitializeAtLeastOnceDestroyNever api_init; | ^~~~~~~~ ``` ``` In file included from /usr/local/google/home/rwgk/forked/pybind11/tests/test_numpy_array.cpp:10: /usr/local/google/home/rwgk/forked/pybind11/include/pybind11/numpy.h: In static member function ‘static pybind11::object& pybind11::dtype::_dtype_from_pep3118()’: /usr/local/google/home/rwgk/forked/pybind11/include/pybind11/numpy.h:697:13: error: ‘constinit’ variable ‘imported_obj’ does not have a constant initializer 697 | imported_obj; | ^~~~~~~~~~~~ ``` * Revert "Add `PYBIND11_CONSTINIT`, but it does not work for the current use cases:" This reverts commit f07b28bda9f91fb723aa898a21c81b6dd6857072. * Reapply "Add `PYBIND11_CONSTINIT`, but it does not work for the current use cases:" This reverts commit 36be645758aa82b576d24003808386bec6e55bf9. * Add Default Member Initializer on `value_storage_` as suggested by @tkoeppe: https://github.com/pybind/pybind11/pull/4877#issuecomment-1753201342 This fixes the errors reported under commit f07b28bda9f91fb723aa898a21c81b6dd6857072. * Fix copy-paste-missed-a-change mishap in commit 88cec1152ab5576db19bab95c484672f06f5989a. * Semi-paranoid placement new (based on https://github.com/pybind/pybind11/pull/4877#discussion_r1350573114). * Move PYBIND11_CONSTINIT to detail/common.h * Move code to the right places, rename new class and some variables. * Fix oversight: update tests/extra_python_package/test_files.py * Get the name right first. * Use `std::call_once`, `std::atomic`, following a pattern developed by @tkoeppe * Make the API more self-documenting (and possibly more easily reusable). * google-clang-tidy IWYU fixes * Rewrite comment as suggested by @tkoeppe * Update test_exceptions.cpp and exceptions.rst * Fix oversight in previous commit: add `PYBIND11_CONSTINIT` * Make `get_stored()` non-const for simplicity. As suggested by @tkoeppe: not seeing any reasonable use in which `get_stored` has to be const. * Add comment regarding `KeyboardInterrupt` behavior, based heavily on information provided by @jbms. * Add `assert(PyGILState_Check())` in `gil_scoped_release` ctor (simple & non-simple implementation) as suggested by @EthanSteinberg. * Fix oversight in previous commit (missing include cassert). * Remove use of std::atomic, leaving comments with rationale, why it is not needed. * Rewrite comment re `std:optional` based on deeper reflection (aka 2nd thoughts). * Additional comment with the conclusion of a discussion under PR #4877. * https://github.com/pybind/pybind11/pull/4877#issuecomment-1757363179 * Small comment changes suggested by @tkoeppe. --- CMakeLists.txt | 1 + docs/advanced/exceptions.rst | 9 ++- include/pybind11/detail/common.h | 8 +++ include/pybind11/gil.h | 10 ++- include/pybind11/gil_safe_call_once.h | 91 ++++++++++++++++++++++++ include/pybind11/numpy.h | 19 +++-- tests/extra_python_package/test_files.py | 1 + tests/test_exceptions.cpp | 8 ++- 8 files changed, 133 insertions(+), 14 deletions(-) create mode 100644 include/pybind11/gil_safe_call_once.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 65ad0c22fd..d2c44dc40d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -142,6 +142,7 @@ set(PYBIND11_HEADERS include/pybind11/embed.h include/pybind11/eval.h include/pybind11/gil.h + include/pybind11/gil_safe_call_once.h include/pybind11/iostream.h include/pybind11/functional.h include/pybind11/numpy.h diff --git a/docs/advanced/exceptions.rst b/docs/advanced/exceptions.rst index 616e2d7726..e20f42b5fe 100644 --- a/docs/advanced/exceptions.rst +++ b/docs/advanced/exceptions.rst @@ -141,15 +141,14 @@ standard python RuntimeError: .. code-block:: cpp - // This is a static object, so we must leak the Python reference: - // It is undefined when the destructor will run, possibly only after the - // Python interpreter is finalized already. - static py::handle exc = py::exception(m, "MyCustomError").release(); + PYBIND11_CONSTINIT static py::gil_safe_call_once_and_store exc_storage; + exc_storage.call_once_and_store_result( + [&]() { return py::exception(m, "MyCustomError"); }); py::register_exception_translator([](std::exception_ptr p) { try { if (p) std::rethrow_exception(p); } catch (const MyCustomException &e) { - py::set_error(exc, e.what()); + py::set_error(exc_storage.get_stored(), e.what()); } catch (const OtherException &e) { py::set_error(PyExc_RuntimeError, e.what()); } diff --git a/include/pybind11/detail/common.h b/include/pybind11/detail/common.h index e79f7693d0..83800e960b 100644 --- a/include/pybind11/detail/common.h +++ b/include/pybind11/detail/common.h @@ -118,6 +118,14 @@ # endif #endif +#if defined(PYBIND11_CPP20) +# define PYBIND11_CONSTINIT constinit +# define PYBIND11_DTOR_CONSTEXPR constexpr +#else +# define PYBIND11_CONSTINIT +# define PYBIND11_DTOR_CONSTEXPR +#endif + // Compiler version assertions #if defined(__INTEL_COMPILER) # if __INTEL_COMPILER < 1800 diff --git a/include/pybind11/gil.h b/include/pybind11/gil.h index 570a5581d5..da22f48d7e 100644 --- a/include/pybind11/gil.h +++ b/include/pybind11/gil.h @@ -11,6 +11,8 @@ #include "detail/common.h" +#include + #if defined(WITH_THREAD) && !defined(PYBIND11_SIMPLE_GIL_MANAGEMENT) # include "detail/internals.h" #endif @@ -137,7 +139,9 @@ class gil_scoped_acquire { class gil_scoped_release { public: + // PRECONDITION: The GIL must be held when this constructor is called. explicit gil_scoped_release(bool disassoc = false) : disassoc(disassoc) { + assert(PyGILState_Check()); // `get_internals()` must be called here unconditionally in order to initialize // `internals.tstate` for subsequent `gil_scoped_acquire` calls. Otherwise, an // initialization race could occur as multiple threads try `gil_scoped_acquire`. @@ -201,7 +205,11 @@ class gil_scoped_release { PyThreadState *state; public: - gil_scoped_release() : state{PyEval_SaveThread()} {} + // PRECONDITION: The GIL must be held when this constructor is called. + gil_scoped_release() { + assert(PyGILState_Check()); + state = PyEval_SaveThread(); + } gil_scoped_release(const gil_scoped_release &) = delete; gil_scoped_release &operator=(const gil_scoped_release &) = delete; ~gil_scoped_release() { PyEval_RestoreThread(state); } diff --git a/include/pybind11/gil_safe_call_once.h b/include/pybind11/gil_safe_call_once.h new file mode 100644 index 0000000000..eaf84d16e8 --- /dev/null +++ b/include/pybind11/gil_safe_call_once.h @@ -0,0 +1,91 @@ +// Copyright (c) 2023 The pybind Community. + +#pragma once + +#include "detail/common.h" +#include "gil.h" + +#include +#include + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +// Use the `gil_safe_call_once_and_store` class below instead of the naive +// +// static auto imported_obj = py::module_::import("module_name"); // BAD, DO NOT USE! +// +// which has two serious issues: +// +// 1. Py_DECREF() calls potentially after the Python interpreter was finalized already, and +// 2. deadlocks in multi-threaded processes (because of missing lock ordering). +// +// The following alternative avoids both problems: +// +// PYBIND11_CONSTINIT static py::gil_safe_call_once_and_store storage; +// auto &imported_obj = storage // Do NOT make this `static`! +// .call_once_and_store_result([]() { +// return py::module_::import("module_name"); +// }) +// .get_stored(); +// +// The parameter of `call_once_and_store_result()` must be callable. It can make +// CPython API calls, and in particular, it can temporarily release the GIL. +// +// `T` can be any C++ type, it does not have to involve CPython API types. +// +// The behavior with regard to signals, e.g. `SIGINT` (`KeyboardInterrupt`), +// is not ideal. If the main thread is the one to actually run the `Callable`, +// then a `KeyboardInterrupt` will interrupt it if it is running normal Python +// code. The situation is different if a non-main thread runs the +// `Callable`, and then the main thread starts waiting for it to complete: +// a `KeyboardInterrupt` will not interrupt the non-main thread, but it will +// get processed only when it is the main thread's turn again and it is running +// normal Python code. However, this will be unnoticeable for quick call-once +// functions, which is usually the case. +template +class gil_safe_call_once_and_store { +public: + // PRECONDITION: The GIL must be held when `call_once_and_store_result()` is called. + template + gil_safe_call_once_and_store &call_once_and_store_result(Callable &&fn) { + if (!is_initialized_) { // This read is guarded by the GIL. + // Multiple threads may enter here, because the GIL is released in the next line and + // CPython API calls in the `fn()` call below may release and reacquire the GIL. + gil_scoped_release gil_rel; // Needed to establish lock ordering. + std::call_once(once_flag_, [&] { + // Only one thread will ever enter here. + gil_scoped_acquire gil_acq; + ::new (storage_) T(fn()); // fn may release, but will reacquire, the GIL. + is_initialized_ = true; // This write is guarded by the GIL. + }); + // All threads will observe `is_initialized_` as true here. + } + // Intentionally not returning `T &` to ensure the calling code is self-documenting. + return *this; + } + + // This must only be called after `call_once_and_store_result()` was called. + T &get_stored() { + assert(is_initialized_); + PYBIND11_WARNING_PUSH +#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 5 + // Needed for gcc 4.8.5 + PYBIND11_WARNING_DISABLE_GCC("-Wstrict-aliasing") +#endif + return *reinterpret_cast(storage_); + PYBIND11_WARNING_POP + } + + constexpr gil_safe_call_once_and_store() = default; + PYBIND11_DTOR_CONSTEXPR ~gil_safe_call_once_and_store() = default; + +private: + alignas(T) char storage_[sizeof(T)] = {}; + std::once_flag once_flag_ = {}; + bool is_initialized_ = false; + // The `is_initialized_`-`storage_` pair is very similar to `std::optional`, + // but the latter does not have the triviality properties of former, + // therefore `std::optional` is not a viable alternative here. +}; + +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/include/pybind11/numpy.h b/include/pybind11/numpy.h index 23c38660e9..8551aa2648 100644 --- a/include/pybind11/numpy.h +++ b/include/pybind11/numpy.h @@ -10,7 +10,10 @@ #pragma once #include "pybind11.h" +#include "detail/common.h" #include "complex.h" +#include "gil_safe_call_once.h" +#include "pytypes.h" #include #include @@ -206,8 +209,8 @@ struct npy_api { }; static npy_api &get() { - static npy_api api = lookup(); - return api; + PYBIND11_CONSTINIT static gil_safe_call_once_and_store storage; + return storage.call_once_and_store_result(lookup).get_stored(); } bool PyArray_Check_(PyObject *obj) const { @@ -643,10 +646,14 @@ class dtype : public object { char flags() const { return detail::array_descriptor_proxy(m_ptr)->flags; } private: - static object _dtype_from_pep3118() { - module_ m = detail::import_numpy_core_submodule("_internal"); - static PyObject *obj = m.attr("_dtype_from_pep3118").cast().release().ptr(); - return reinterpret_borrow(obj); + static object &_dtype_from_pep3118() { + PYBIND11_CONSTINIT static gil_safe_call_once_and_store storage; + return storage + .call_once_and_store_result([]() { + return detail::import_numpy_core_submodule("_internal") + .attr("_dtype_from_pep3118"); + }) + .get_stored(); } dtype strip_padding(ssize_t itemsize) { diff --git a/tests/extra_python_package/test_files.py b/tests/extra_python_package/test_files.py index e3d881c0b3..344e70d5db 100644 --- a/tests/extra_python_package/test_files.py +++ b/tests/extra_python_package/test_files.py @@ -35,6 +35,7 @@ "include/pybind11/eval.h", "include/pybind11/functional.h", "include/pybind11/gil.h", + "include/pybind11/gil_safe_call_once.h", "include/pybind11/iostream.h", "include/pybind11/numpy.h", "include/pybind11/operators.h", diff --git a/tests/test_exceptions.cpp b/tests/test_exceptions.cpp index f6d9c215c9..d3af7ab728 100644 --- a/tests/test_exceptions.cpp +++ b/tests/test_exceptions.cpp @@ -6,6 +6,8 @@ All rights reserved. Use of this source code is governed by a BSD-style license that can be found in the LICENSE file. */ +#include + #include "test_exceptions.h" #include "local_bindings.h" @@ -114,7 +116,9 @@ TEST_SUBMODULE(exceptions, m) { []() { throw std::runtime_error("This exception was intentionally thrown."); }); // PLEASE KEEP IN SYNC with docs/advanced/exceptions.rst - static py::handle ex = py::exception(m, "MyException").release(); + PYBIND11_CONSTINIT static py::gil_safe_call_once_and_store ex_storage; + ex_storage.call_once_and_store_result( + [&]() { return py::exception(m, "MyException"); }); py::register_exception_translator([](std::exception_ptr p) { try { if (p) { @@ -122,7 +126,7 @@ TEST_SUBMODULE(exceptions, m) { } } catch (const MyException &e) { // Set MyException as the active python error - py::set_error(ex, e.what()); + py::set_error(ex_storage.get_stored(), e.what()); } });