From 377feb8cf469614868a17213063ac85b77bc5696 Mon Sep 17 00:00:00 2001 From: Matthew Broadway Date: Fri, 13 Dec 2024 18:03:23 +0000 Subject: [PATCH] Improve native object initialization Before this change, classes inheriting the base object was a special case where `object.__new__` is not called, and inheriting from other base classes requires use of the unlimited API. Previously this was not very limiting, but with it will be possible to inherit from native base classes with the limited API and possibly from dynamically imported native base classes which may require `__new__` arguments to reach them. --- newsfragments/4798.fixed.md | 2 + pyo3-macros-backend/src/method.rs | 4 +- src/impl_/pyclass.rs | 3 + src/impl_/pyclass_init.rs | 89 ++++++++--------- src/impl_/pymethods.rs | 25 +++-- src/internal/get_slot.rs | 2 +- src/pyclass_init.rs | 152 +++++++++++++++++++++++++++--- src/sealed.rs | 5 +- src/types/any.rs | 2 + 9 files changed, 220 insertions(+), 64 deletions(-) create mode 100644 newsfragments/4798.fixed.md diff --git a/newsfragments/4798.fixed.md b/newsfragments/4798.fixed.md new file mode 100644 index 00000000000..e53757d0a01 --- /dev/null +++ b/newsfragments/4798.fixed.md @@ -0,0 +1,2 @@ +fixes several limitations with base native type initialization. Required for extending native base types +with the limited API and extending base native types that require arguments passed to `__new__`. \ No newline at end of file diff --git a/pyo3-macros-backend/src/method.rs b/pyo3-macros-backend/src/method.rs index a1d7a95df35..ed17f7de44a 100644 --- a/pyo3-macros-backend/src/method.rs +++ b/pyo3-macros-backend/src/method.rs @@ -830,12 +830,14 @@ impl<'a> FnSpec<'a> { _kwargs: *mut #pyo3_path::ffi::PyObject ) -> #pyo3_path::PyResult<*mut #pyo3_path::ffi::PyObject> { use #pyo3_path::impl_::callback::IntoPyCallbackOutput; + let raw_args = _args; + let raw_kwargs = _kwargs; let function = #rust_name; // Shadow the function name to avoid #3017 #arg_convert #init_holders let result = #call; let initializer: #pyo3_path::PyClassInitializer::<#cls> = result.convert(py)?; - #pyo3_path::impl_::pymethods::tp_new_impl(py, initializer, _slf) + #pyo3_path::impl_::pymethods::tp_new_impl(py, initializer, _slf, raw_args, raw_kwargs) } } } diff --git a/src/impl_/pyclass.rs b/src/impl_/pyclass.rs index ac5c6e3e3f0..fa4f1e3b5c1 100644 --- a/src/impl_/pyclass.rs +++ b/src/impl_/pyclass.rs @@ -1138,6 +1138,9 @@ pub trait PyClassBaseType: Sized { type BaseNativeType; type Initializer: PyObjectInit; type PyClassMutability: PyClassMutability; + /// Whether `__new__` ([ffi::PyTypeObject::tp_new]) for this type accepts arguments other + /// than the type of object to create. + const NEW_ACCEPTS_ARGUMENTS: bool = true; } /// Implementation of tp_dealloc for pyclasses without gc diff --git a/src/impl_/pyclass_init.rs b/src/impl_/pyclass_init.rs index 7242b6186d9..bfd85476eb7 100644 --- a/src/impl_/pyclass_init.rs +++ b/src/impl_/pyclass_init.rs @@ -1,11 +1,13 @@ //! Contains initialization utilities for `#[pyclass]`. use crate::ffi_ptr_ext::FfiPtrExt; -use crate::internal::get_slot::TP_ALLOC; -use crate::types::PyType; -use crate::{ffi, Borrowed, PyErr, PyResult, Python}; +use crate::internal::get_slot::TP_NEW; +use crate::types::{PyDict, PyTuple, PyType}; +use crate::{ffi, Borrowed, Bound, PyErr, PyResult, Python}; use crate::{ffi::PyTypeObject, sealed::Sealed, type_object::PyTypeInfo}; use std::marker::PhantomData; +use super::pyclass::PyClassBaseType; + /// Initializer for Python types. /// /// This trait is intended to use internally for distinguishing `#[pyclass]` and @@ -17,69 +19,70 @@ pub trait PyObjectInit: Sized + Sealed { self, py: Python<'_>, subtype: *mut PyTypeObject, + args: &Bound<'_, PyTuple>, + kwargs: Option<&Bound<'_, PyDict>>, ) -> PyResult<*mut ffi::PyObject>; #[doc(hidden)] fn can_be_subclassed(&self) -> bool; } -/// Initializer for Python native types, like `PyDict`. -pub struct PyNativeTypeInitializer(pub PhantomData); +/// Initializer for Python native types, like [PyDict]. +pub struct PyNativeTypeInitializer(pub PhantomData); -impl PyObjectInit for PyNativeTypeInitializer { +impl PyObjectInit for PyNativeTypeInitializer { + /// call `__new__` ([ffi::PyTypeObject::tp_new]) for the native base type. + /// This will allocate a new python object and initialize the part relating to the native base type. unsafe fn into_new_object( self, py: Python<'_>, subtype: *mut PyTypeObject, + args: &Bound<'_, PyTuple>, + kwargs: Option<&Bound<'_, PyDict>>, ) -> PyResult<*mut ffi::PyObject> { unsafe fn inner( py: Python<'_>, - type_object: *mut PyTypeObject, + native_base_type: *mut PyTypeObject, subtype: *mut PyTypeObject, + args: &Bound<'_, PyTuple>, + kwargs: Option<&Bound<'_, PyDict>>, + new_accepts_arguments: bool, ) -> PyResult<*mut ffi::PyObject> { - // HACK (due to FIXME below): PyBaseObject_Type's tp_new isn't happy with NULL arguments - let is_base_object = type_object == std::ptr::addr_of_mut!(ffi::PyBaseObject_Type); - let subtype_borrowed: Borrowed<'_, '_, PyType> = subtype + let native_base_type_borrowed: Borrowed<'_, '_, PyType> = native_base_type .cast::() .assume_borrowed_unchecked(py) .downcast_unchecked(); + let tp_new = native_base_type_borrowed + .get_slot(TP_NEW) + .unwrap_or(ffi::PyType_GenericNew); - if is_base_object { - let alloc = subtype_borrowed - .get_slot(TP_ALLOC) - .unwrap_or(ffi::PyType_GenericAlloc); - - let obj = alloc(subtype, 0); - return if obj.is_null() { - Err(PyErr::fetch(py)) - } else { - Ok(obj) - }; - } - - #[cfg(Py_LIMITED_API)] - unreachable!("subclassing native types is not possible with the `abi3` feature"); + let obj = if new_accepts_arguments { + tp_new( + subtype, + args.as_ptr(), + kwargs + .map(|obj| obj.as_ptr()) + .unwrap_or(std::ptr::null_mut()), + ) + } else { + let args = PyTuple::empty(py); + tp_new(subtype, args.as_ptr(), std::ptr::null_mut()) + }; - #[cfg(not(Py_LIMITED_API))] - { - match (*type_object).tp_new { - // FIXME: Call __new__ with actual arguments - Some(newfunc) => { - let obj = newfunc(subtype, std::ptr::null_mut(), std::ptr::null_mut()); - if obj.is_null() { - Err(PyErr::fetch(py)) - } else { - Ok(obj) - } - } - None => Err(crate::exceptions::PyTypeError::new_err( - "base type without tp_new", - )), - } + if obj.is_null() { + Err(PyErr::fetch(py)) + } else { + Ok(obj) } } - let type_object = T::type_object_raw(py); - inner(py, type_object, subtype) + inner( + py, + T::type_object_raw(py), + subtype, + args, + kwargs, + T::NEW_ACCEPTS_ARGUMENTS, + ) } #[inline] diff --git a/src/impl_/pymethods.rs b/src/impl_/pymethods.rs index 58d0c93c240..705c41e413d 100644 --- a/src/impl_/pymethods.rs +++ b/src/impl_/pymethods.rs @@ -8,10 +8,10 @@ use crate::pycell::impl_::PyClassBorrowChecker as _; use crate::pycell::{PyBorrowError, PyBorrowMutError}; use crate::pyclass::boolean_struct::False; use crate::types::any::PyAnyMethods; -use crate::types::PyType; +use crate::types::{PyDict, PyTuple, PyType}; use crate::{ - ffi, Bound, DowncastError, Py, PyAny, PyClass, PyClassInitializer, PyErr, PyObject, PyRef, - PyRefMut, PyResult, PyTraverseError, PyTypeCheck, PyVisit, Python, + ffi, Borrowed, Bound, DowncastError, Py, PyAny, PyClass, PyClassInitializer, PyErr, PyObject, + PyRef, PyRefMut, PyResult, PyTraverseError, PyTypeCheck, PyVisit, Python, }; use std::ffi::CStr; use std::fmt; @@ -696,13 +696,24 @@ impl<'py, T> std::ops::Deref for BoundRef<'_, 'py, T> { } } -pub unsafe fn tp_new_impl( - py: Python<'_>, +pub unsafe fn tp_new_impl<'py, T: PyClass>( + py: Python<'py>, initializer: PyClassInitializer, - target_type: *mut ffi::PyTypeObject, + most_derived_type: *mut ffi::PyTypeObject, + args: *mut ffi::PyObject, + kwargs: *mut ffi::PyObject, ) -> PyResult<*mut ffi::PyObject> { + // Safety: + // - `args` is known to be a tuple + // - `kwargs` is known to be a dict or null + // - we both have the GIL and can borrow these input references for the `'py` lifetime. + let args: Borrowed<'py, 'py, PyTuple> = + Borrowed::from_ptr(py, args).downcast_unchecked::(); + let kwargs: Option> = + Borrowed::from_ptr_or_opt(py, kwargs).map(|kwargs| kwargs.downcast_unchecked()); + initializer - .create_class_object_of_type(py, target_type) + .create_class_object_of_type(py, most_derived_type, &args, kwargs.as_deref()) .map(Bound::into_ptr) } diff --git a/src/internal/get_slot.rs b/src/internal/get_slot.rs index 260893d4204..6a24608d6e9 100644 --- a/src/internal/get_slot.rs +++ b/src/internal/get_slot.rs @@ -122,11 +122,11 @@ macro_rules! impl_slots { // Slots are implemented on-demand as needed.) impl_slots! { - TP_ALLOC: (Py_tp_alloc, tp_alloc) -> Option, TP_BASE: (Py_tp_base, tp_base) -> *mut ffi::PyTypeObject, TP_CLEAR: (Py_tp_clear, tp_clear) -> Option, TP_DESCR_GET: (Py_tp_descr_get, tp_descr_get) -> Option, TP_FREE: (Py_tp_free, tp_free) -> Option, + TP_NEW: (Py_tp_new, tp_new) -> Option, TP_TRAVERSE: (Py_tp_traverse, tp_traverse) -> Option, } diff --git a/src/pyclass_init.rs b/src/pyclass_init.rs index 6dc6ec12c6b..c913ed4d234 100644 --- a/src/pyclass_init.rs +++ b/src/pyclass_init.rs @@ -1,9 +1,60 @@ //! Contains initialization utilities for `#[pyclass]`. +//! +//! # Background +//! +//! Initialization of a regular empty python class `class MyClass: pass` +//! - `MyClass(*args, **kwargs) == MyClass.__call__(*args, **kwargs) == type.__call__(*args, **kwargs)` +//! - `MyClass.tp_call` is NULL but `obj.__call__` uses `Py_TYPE(obj)->tp_call` ([ffi::_PyObject_MakeTpCall]) +//! - so `__call__` is inherited from the metaclass which is `type` in this case. +//! - `type.__call__` calls `obj = MyClass.__new__(MyClass, *args, **kwargs) == object.__new__(MyClass, *args, **kwargs)` +//! - `MyClass.tp_new` is inherited from the base class which is `object` in this case. +//! - Allocates a new object and does some basic initialization +//! - `type.__call__` calls `MyClass.__init__(obj, *args, **kwargs) == object.__init__(obj, *args, **kwargs)` +//! - `MyClass.tp_init` is inherited from the base class which is `object` in this case. +//! - Does some checks but is essentially a no-op +//! +//! So in general for `class MyClass(BaseClass, metaclass=MetaClass): pass` +//! - `MyClass(*args, **kwargs)` +//! - `Metaclass.__call__(*args, **kwargs)` +//! - `BaseClass.__new__(*args, **kwargs)` +//! - `BaseClass.__init__(*args, **kwargs)` +//! +//! - If `MyClass` defines `__new__` then it must delegate to `super(MyClass, cls).__new__(cls, *args, **kwargs)` to +//! allocate the object. +//! - is is the responsibility of `MyClass` to call `super().__new__()` with the correct arguments. +//! `object.__new__()` does not accept any arguments for example. +//! - If `MyClass` defines `__init__` then it should call `super().__init__()` to recursively initialize +//! the base classes. Again, passing arguments is the responsibility of MyClass. +//! +//! Initialization of a pyo3 `#[pyclass] struct MyClass;` +//! - `MyClass(*args, **kwargs) == MyClass.__call__(*args, **kwargs) == type.__call__(*args, **kwargs)` +//! - Calls `obj = MyClass.__new__(MyClass, *args, **kwargs)` +//! - Calls user defined `#[new]` function, returning a [IntoPyCallbackOutput] which has +//! instances of each user defined struct in the inheritance hierarchy. +//! - Calls [PyClassInitializer::create_class_object_of_type] +//! - Recursively calls back to the base native type. +//! - At the base native type, [PyObjectInit::into_new_object] calls `__new__` for the base native type +//! (passing the [ffi::PyTypeObject] of the most derived type) +//! - Allocates a new python object with enough space to fit the user structs and the native base type data. +//! - Initializes the native base type part of the new object. +//! - Moves the data for the user structs into the appropriate locations in the new python object. +//! - Calls `MyClass.__init__(obj, *args, **kwargs)` +//! - Inherited from native base class +//! +//! ## Notes: +//! - pyo3 classes annotated with `#[pyclass(dict)]` have a `__dict__` attribute. When using the `tp_dictoffset` +//! mechanism instead of `Py_TPFLAGS_MANAGED_DICT` to enable this, the dict is stored in the [PyClassObjectContents] +//! of the most derived type and is set to NULL at construction and initialized to a new dictionary by +//! [ffi::PyObject_GenericGetDict] when first accessed. +//! - The python documentation also mentions 'static' classes which define their [ffi::PyTypeObject] in static/global +//! memory. Builtins like `dict` (`PyDict_Type`) are defined this way but classes defined in python and pyo3 are +//! 'heap' types where the [ffi::PyTypeObject] objects are allocated at runtime. +//! use crate::ffi_ptr_ext::FfiPtrExt; use crate::impl_::callback::IntoPyCallbackOutput; use crate::impl_::pyclass::{PyClassBaseType, PyClassDict, PyClassThreadChecker, PyClassWeakRef}; use crate::impl_::pyclass_init::{PyNativeTypeInitializer, PyObjectInit}; -use crate::types::PyAnyMethods; +use crate::types::{PyAnyMethods, PyDict, PyTuple}; use crate::{ffi, Bound, Py, PyClass, PyResult, Python}; use crate::{ ffi::PyTypeObject, @@ -150,18 +201,22 @@ impl PyClassInitializer { where T: PyClass, { - unsafe { self.create_class_object_of_type(py, T::type_object_raw(py)) } + unsafe { + self.create_class_object_of_type(py, T::type_object_raw(py), &PyTuple::empty(py), None) + } } /// Creates a new class object and initializes it given a typeobject `subtype`. /// /// # Safety /// `subtype` must be a valid pointer to the type object of T or a subclass. - pub(crate) unsafe fn create_class_object_of_type( + pub(crate) unsafe fn create_class_object_of_type<'py>( self, - py: Python<'_>, + py: Python<'py>, target_type: *mut crate::ffi::PyTypeObject, - ) -> PyResult> + args: &Bound<'py, PyTuple>, + kwargs: Option<&Bound<'py, PyDict>>, + ) -> PyResult> where T: PyClass, { @@ -178,7 +233,7 @@ impl PyClassInitializer { PyClassInitializerImpl::New { init, super_init } => (init, super_init), }; - let obj = super_init.into_new_object(py, target_type)?; + let obj = super_init.into_new_object(py, target_type, args, kwargs)?; let part_init: *mut PartiallyInitializedClassObject = obj.cast(); std::ptr::write( @@ -203,8 +258,10 @@ impl PyObjectInit for PyClassInitializer { self, py: Python<'_>, subtype: *mut PyTypeObject, + args: &Bound<'_, PyTuple>, + kwargs: Option<&Bound<'_, PyDict>>, ) -> PyResult<*mut ffi::PyObject> { - self.create_class_object_of_type(py, subtype) + self.create_class_object_of_type(py, subtype, args, kwargs) .map(Bound::into_ptr) } @@ -268,9 +325,13 @@ where #[cfg(all(test, feature = "macros"))] mod tests { - //! See https://github.com/PyO3/pyo3/issues/4452. - - use crate::prelude::*; + use crate::{ + ffi, + impl_::pyclass::PyClassImpl, + prelude::*, + types::{PyDict, PyType}, + PyTypeInfo, + }; #[pyclass(crate = "crate", subclass)] struct BaseClass {} @@ -280,12 +341,81 @@ mod tests { _data: i32, } + /// See https://github.com/PyO3/pyo3/issues/4452. #[test] - #[should_panic] + #[should_panic(expected = "you cannot add a subclass to an existing value")] fn add_subclass_to_py_is_unsound() { Python::with_gil(|py| { let base = Py::new(py, BaseClass {}).unwrap(); let _subclass = PyClassInitializer::from(base).add_subclass(SubClass { _data: 42 }); }); } + + /// Verify the correctness of the documentation describing class initialization. + #[test] + fn empty_class_inherits_expected_slots() { + Python::with_gil(|py| { + let namespace = PyDict::new(py); + py.run( + ffi::c_str!("class EmptyClass: pass"), + Some(&namespace), + Some(&namespace), + ) + .unwrap(); + let empty_class = namespace + .get_item("EmptyClass") + .unwrap() + .unwrap() + .downcast::() + .unwrap() + .as_type_ptr(); + + let object_type = PyAny::type_object(py).as_type_ptr(); + unsafe { + assert_eq!( + ffi::PyType_GetSlot(empty_class, ffi::Py_tp_new), + ffi::PyType_GetSlot(object_type, ffi::Py_tp_new) + ); + assert_eq!( + ffi::PyType_GetSlot(empty_class, ffi::Py_tp_init), + ffi::PyType_GetSlot(object_type, ffi::Py_tp_init) + ); + assert!(ffi::PyType_GetSlot(empty_class, ffi::Py_tp_call).is_null(),); + } + + let base_class = BaseClass::type_object_raw(py); + unsafe { + // tp_new is always set for pyclasses, not inherited + assert_ne!( + ffi::PyType_GetSlot(base_class, ffi::Py_tp_new), + ffi::PyType_GetSlot(object_type, ffi::Py_tp_new) + ); + assert_eq!( + ffi::PyType_GetSlot(base_class, ffi::Py_tp_init), + ffi::PyType_GetSlot(object_type, ffi::Py_tp_init) + ); + assert!(ffi::PyType_GetSlot(base_class, ffi::Py_tp_call).is_null(),); + } + }); + } + + /// Verify the correctness of the documentation describing class initialization. + #[test] + fn managed_dict_initialized_as_expected() { + #[pyclass(crate = "crate", dict)] + struct ClassWithDict {} + + Python::with_gil(|py| { + let obj = Py::new(py, ClassWithDict {}).unwrap(); + let dict_offset = ClassWithDict::dict_offset().unwrap(); + assert!(dict_offset > 0); + unsafe { + let obj_dict = + (obj.as_ptr() as *mut u8).add(dict_offset as usize) as *mut *mut ffi::PyObject; + assert!((*obj_dict).is_null()); + crate::py_run!(py, obj, "obj.__dict__"); + assert!(!(*obj_dict).is_null()); + } + }); + } } diff --git a/src/sealed.rs b/src/sealed.rs index 0a2846b134a..9fec6769b80 100644 --- a/src/sealed.rs +++ b/src/sealed.rs @@ -51,7 +51,10 @@ impl Sealed for AddClassToModule {} impl Sealed for PyMethodDef {} impl Sealed for ModuleDef {} -impl Sealed for PyNativeTypeInitializer {} +impl Sealed + for PyNativeTypeInitializer +{ +} impl Sealed for PyClassInitializer {} impl Sealed for std::sync::Once {} diff --git a/src/types/any.rs b/src/types/any.rs index d060c187631..689caf42bde 100644 --- a/src/types/any.rs +++ b/src/types/any.rs @@ -51,6 +51,8 @@ impl crate::impl_::pyclass::PyClassBaseType for PyAny { type BaseNativeType = PyAny; type Initializer = crate::impl_::pyclass_init::PyNativeTypeInitializer; type PyClassMutability = crate::pycell::impl_::ImmutableClass; + // `object.__new__` should be called with only the type of object to create, without any other arguments. + const NEW_ACCEPTS_ARGUMENTS: bool = false; } /// This trait represents the Python APIs which are usable on all Python objects.