diff --git a/.github/workflows/build_langsmith_pyo3_wheels.yml b/.github/workflows/build_langsmith_pyo3_wheels.yml index d844a48ac..07afd3c3a 100644 --- a/.github/workflows/build_langsmith_pyo3_wheels.yml +++ b/.github/workflows/build_langsmith_pyo3_wheels.yml @@ -36,8 +36,7 @@ permissions: contents: read env: - # TODO: Switch off of nightly Rust. - RUST_VERSION: nightly + RUST_VERSION: stable SUPPORTED_PYTHON_VERSIONS: 'python3.8 python3.9 python3.10 python3.11 python3.12' WORKING_DIRECTORY: rust/crates/langsmith_pyo3 diff --git a/rust/crates/langsmith_pyo3/README.md b/rust/crates/langsmith_pyo3/README.md index 9d2086015..a7f4e7d2a 100644 --- a/rust/crates/langsmith_pyo3/README.md +++ b/rust/crates/langsmith_pyo3/README.md @@ -23,5 +23,3 @@ To run install these bindings into another virtualenv (e.g. to run benchmarks), activate that virtualenv, then `cd` to this directory and run `uvx maturin develop --release`. When that command completes, the virtualenv will have an optimized build of `langsmith_pyo3` installed. - -TODO: Move the `.github/workflows` CI workflow for testing the maturin build to the top level of the repo. diff --git a/rust/crates/langsmith_pyo3/src/lib.rs b/rust/crates/langsmith_pyo3/src/lib.rs index 7d3e6f236..c0786102f 100644 --- a/rust/crates/langsmith_pyo3/src/lib.rs +++ b/rust/crates/langsmith_pyo3/src/lib.rs @@ -1,4 +1,3 @@ -#![feature(vec_into_raw_parts)] #![allow(deprecated)] use pyo3::{pymodule, types::PyModule, Bound, PyResult, Python}; diff --git a/rust/crates/langsmith_pyo3/src/py_run.rs b/rust/crates/langsmith_pyo3/src/py_run.rs index 6f911a2f2..69d0d692e 100644 --- a/rust/crates/langsmith_pyo3/src/py_run.rs +++ b/rust/crates/langsmith_pyo3/src/py_run.rs @@ -8,9 +8,6 @@ use crate::{errors::TracingClientError, serialization}; // TODO: consider interning all the strings here -// TODO: consider replacing `String` with `Box`, and `Vec` with `Box<[T]>`, -// since none of them are growable and we can make them more compact in memory - #[derive(Debug)] pub struct RunCreateExtended(langsmith_tracing_client::client::RunCreateExtended); diff --git a/rust/crates/langsmith_pyo3/src/serialization/writer.rs b/rust/crates/langsmith_pyo3/src/serialization/writer.rs index f4265495a..2d0136d91 100644 --- a/rust/crates/langsmith_pyo3/src/serialization/writer.rs +++ b/rust/crates/langsmith_pyo3/src/serialization/writer.rs @@ -1,5 +1,15 @@ use std::io::Error; +/// Writer that points directly to a `[u8]` buffer. +/// +/// # Safety +/// The external interface of this type is fully safe. +/// +/// An internal invariant is that the `(buf, len, cap)` fields must be equivalent to a valid `Vec` +/// both at the start and conclusion of any method call: +/// - `self.buf` points to valid, initialized memory to which `Self` has exclusive access +/// - `self.len` is a legal offset that remains in bounds when used with `self.buf`. +/// - `self.cap` accurately shows how much capacity the `self.buf` memory region has. pub(super) struct BufWriter { buf: *mut u8, len: usize, @@ -11,7 +21,23 @@ impl BufWriter { pub(super) fn new() -> Self { let buffer = Vec::with_capacity(Self::DEFAULT_CAPACITY); - let (buf, len, cap) = buffer.into_raw_parts(); + + let (buf, len, cap) = { + // Prevent the `Vec` from being dropped at the end of this scope. + let mut buffer = std::mem::ManuallyDrop::new(buffer); + + // Get the `Vec`'s components. + let buf = buffer.as_mut_ptr(); + let len = buffer.len(); + let cap = buffer.capacity(); + + // We now own the `Vec`'s backing data. + // The `Vec` goes out of scope but will not be dropped + // due to being wrapped in `ManuallyDrop`. + (buf, len, cap) + }; + + // SAFETY: These values are derived from a valid `Vec` that has not been dropped. Self { buf, len, cap } } @@ -51,10 +77,27 @@ impl BufWriter { // SAFETY: The buffer used to be a vector, and is exclusively owned by us. // The `&mut self` here guarantees there can't be another mutable reference to it. // It's safe to turn it back into a `Vec` and ask the `Vec` to resize itself. + // After resizing, we deconstruct the `Vec` *and* ensure it isn't dropped, + // meaning that the memory is still live and not use-after-free'd. unsafe { - let mut v = Vec::from_raw_parts(self.buf, self.len, self.cap); - v.reserve(cap - self.cap); - (self.buf, self.len, self.cap) = v.into_raw_parts(); + // SAFETY: `self`'s `(buf, len, cap)` are no longer valid for accessing data + // after the next line, until they are reassigned new values. + let mut buffer = Vec::from_raw_parts(self.buf, self.len, self.cap); + + buffer.reserve(cap - self.cap); + + // Prevent the `Vec` from being dropped at the end of this scope. + let mut buffer = std::mem::ManuallyDrop::new(buffer); + + // Get the `Vec`'s components. + let buf = buffer.as_mut_ptr(); + let len = buffer.len(); + let cap = buffer.capacity(); + + // SAFETY: `self`'s `(buf, len, cap)` values are valid again from this point onward. + // We own the `Vec`'s backing data. The `Vec` goes out of scope but + // will not be dropped due to being wrapped in `ManuallyDrop`. + (self.buf, self.len, self.cap) = (buf, len, cap); } } }