From 91c14c7ee912d3d9b86ad8655098bac67e919df7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20K=C3=B6ppe?= Date: Tue, 19 Nov 2024 13:56:46 +0000 Subject: [PATCH] fix: only consider salient bytes in sharecommon eq, hash (#5840) This PR changes `lean_sharecommon_{eq,hash}` to only consider the salient bytes of an object, and not any bytes of any unspecified/uninitialized unused capacity. Accessing uninitialized storage results in undefined behaviour. This does not seem to have any semantics disadvantages: If objects compare equal after this change, their salient bytes are still equal. By contrast, if the actual identity of allocations needs to be distinguished, that can be done by just comparing pointers to the storage. If we wanted to retain the current logic, we would need initialize the otherwise unused parts to some specific value to avoid the undefined behaviour. Closes #5831 --- src/include/lean/lean.h | 14 ++++++++++++++ src/runtime/object.cpp | 22 ++++++++++++++++++++++ src/runtime/sharecommon.cpp | 6 +++--- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/src/include/lean/lean.h b/src/include/lean/lean.h index 1ad7aac78389..c0b127058f43 100644 --- a/src/include/lean/lean.h +++ b/src/include/lean/lean.h @@ -406,6 +406,13 @@ static inline unsigned lean_ptr_other(lean_object * o) { small objects is a multiple of LEAN_OBJECT_SIZE_DELTA */ LEAN_EXPORT size_t lean_object_byte_size(lean_object * o); +/* Returns the size of the salient part of an object's storage, + i.e. the parts that contribute to the value representation; + padding or unused capacity is excluded. Operations that read + from an object's storage must only access these parts, since + the non-salient parts may not be initialized. */ +LEAN_EXPORT size_t lean_object_data_byte_size(lean_object * o); + static inline bool lean_is_mt(lean_object * o) { return o->m_rc < 0; } @@ -695,6 +702,9 @@ static inline size_t lean_array_capacity(b_lean_obj_arg o) { return lean_to_arra static inline size_t lean_array_byte_size(lean_object * o) { return sizeof(lean_array_object) + sizeof(void*)*lean_array_capacity(o); } +static inline size_t lean_array_data_byte_size(lean_object * o) { + return sizeof(lean_array_object) + sizeof(void*)*lean_array_size(o); +} static inline lean_object ** lean_array_cptr(lean_object * o) { return lean_to_array(o)->m_data; } static inline void lean_array_set_size(u_lean_obj_arg o, size_t sz) { assert(lean_is_array(o)); @@ -852,6 +862,9 @@ static inline size_t lean_sarray_byte_size(lean_object * o) { return sizeof(lean_sarray_object) + lean_sarray_elem_size(o)*lean_sarray_capacity(o); } static inline size_t lean_sarray_size(b_lean_obj_arg o) { return lean_to_sarray(o)->m_size; } +static inline size_t lean_sarray_data_byte_size(lean_object * o) { + return sizeof(lean_sarray_object) + lean_sarray_elem_size(o)*lean_sarray_size(o); +} static inline void lean_sarray_set_size(u_lean_obj_arg o, size_t sz) { assert(lean_is_exclusive(o)); assert(sz <= lean_sarray_capacity(o)); @@ -1013,6 +1026,7 @@ static inline char const * lean_string_cstr(b_lean_obj_arg o) { } static inline size_t lean_string_size(b_lean_obj_arg o) { return lean_to_string(o)->m_size; } static inline size_t lean_string_len(b_lean_obj_arg o) { return lean_to_string(o)->m_length; } +static inline size_t lean_string_data_byte_size(lean_object * o) { return sizeof(lean_string_object) + lean_string_size(o); } LEAN_EXPORT lean_obj_res lean_string_push(lean_obj_arg s, uint32_t c); LEAN_EXPORT lean_obj_res lean_string_append(lean_obj_arg s1, b_lean_obj_arg s2); static inline lean_obj_res lean_string_length(b_lean_obj_arg s) { return lean_box(lean_string_len(s)); } diff --git a/src/runtime/object.cpp b/src/runtime/object.cpp index 9921c3db958d..24444786def4 100644 --- a/src/runtime/object.cpp +++ b/src/runtime/object.cpp @@ -172,6 +172,28 @@ extern "C" LEAN_EXPORT size_t lean_object_byte_size(lean_object * o) { } } +extern "C" LEAN_EXPORT size_t lean_object_data_byte_size(lean_object * o) { + if (o->m_cs_sz == 0) { + /* Recall that multi-threaded, single-threaded and persistent objects are stored in the heap. + Persistent objects are multi-threaded and/or single-threaded that have been "promoted" to + a persistent status. */ + switch (lean_ptr_tag(o)) { + case LeanArray: return lean_array_data_byte_size(o); + case LeanScalarArray: return lean_sarray_data_byte_size(o); + case LeanString: return lean_string_data_byte_size(o); + default: return lean_small_object_size(o); + } + } else { + /* See comment at `lean_set_non_heap_header`, for small objects we store the object size in the RC field. */ + switch (lean_ptr_tag(o)) { + case LeanArray: return lean_array_data_byte_size(o); + case LeanScalarArray: return lean_sarray_data_byte_size(o); + case LeanString: return lean_string_data_byte_size(o); + default: return o->m_cs_sz; + } + } +} + static inline void lean_dealloc(lean_object * o, size_t sz) { #ifdef LEAN_SMALL_ALLOCATOR dealloc(o, sz); diff --git a/src/runtime/sharecommon.cpp b/src/runtime/sharecommon.cpp index 53305df97d60..67951044d0fb 100644 --- a/src/runtime/sharecommon.cpp +++ b/src/runtime/sharecommon.cpp @@ -13,8 +13,8 @@ namespace lean { extern "C" LEAN_EXPORT uint8 lean_sharecommon_eq(b_obj_arg o1, b_obj_arg o2) { lean_assert(!lean_is_scalar(o1)); lean_assert(!lean_is_scalar(o2)); - size_t sz1 = lean_object_byte_size(o1); - size_t sz2 = lean_object_byte_size(o2); + size_t sz1 = lean_object_data_byte_size(o1); + size_t sz2 = lean_object_data_byte_size(o2); if (sz1 != sz2) return false; // compare relevant parts of the header if (lean_ptr_tag(o1) != lean_ptr_tag(o2)) return false; @@ -27,7 +27,7 @@ extern "C" LEAN_EXPORT uint8 lean_sharecommon_eq(b_obj_arg o1, b_obj_arg o2) { extern "C" LEAN_EXPORT uint64_t lean_sharecommon_hash(b_obj_arg o) { lean_assert(!lean_is_scalar(o)); - size_t sz = lean_object_byte_size(o); + size_t sz = lean_object_data_byte_size(o); size_t header_sz = sizeof(lean_object); // hash relevant parts of the header unsigned init = hash(lean_ptr_tag(o), lean_ptr_other(o));