Skip to content

Commit

Permalink
Improve Tuid (#8999)
Browse files Browse the repository at this point in the history
### Related
* Part of #8992

### What
Improve `re_tuid::Tuid`, and by extension `RowId` and `ChunkId` (which
are just typesafe wrappers around `Tuid`)

### Details
* Implement `from_str` for `Tuid/RowId/ChunkId`
* Slightly modify how Tuid is formatted as a string (in
backwards/forwards compatible way!)]
* Implement bytemucking
* Align to bytes (see below)

### Change `Tuid` alignment from `8` to `1`
This will allow us to cast raw bytes into a `&[RowId]`.

`arrow-rs` already aligns all allocations to 64 bytes, so
_theoretically_ there would be nothing to stop us from already doing
this, but this is the "better safe than sorry" approach.

`uuid:::Uuuid` is also aligned to bytes, FWIW.

* `Tuid::new` becomes ~7% slower
* `Tuid::cmp` is unaffected

---------

Co-authored-by: Clement Rey <[email protected]>
  • Loading branch information
emilk and teh-cmc authored Feb 12, 2025
1 parent c795a0b commit a4d3725
Show file tree
Hide file tree
Showing 7 changed files with 133 additions and 57 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -6646,10 +6646,12 @@ dependencies = [
name = "re_tuid"
version = "0.23.0-alpha.1+dev"
dependencies = [
"bytemuck",
"criterion",
"document-features",
"getrandom",
"once_cell",
"rand",
"re_byte_size",
"serde",
"web-time",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ ChunkStore {
│ │ kind: "control"is_sorted: "true"is_sorted: "true"kind: "data"kind: "data" │ │
│ │ ┆ kind: "index"kind: "index" ┆ ┆ │ │
│ ╞══════════════════════════════════╪════════════════════════╪═══════════════════════════════╪══════════════════════════════╪══════════════════════════════╡ │
│ │ 0000000067816A6BB4B8C1254D40007B12025-01-10T18:43:42.123456789 ┆ [0, 1, 2] ┆ [0, 1, 2] │ │
│ │ 0000000067816A6Bb4b8c1254d40007b12025-01-10T18:43:42.123456789 ┆ [0, 1, 2] ┆ [0, 1, 2] │ │
│ └──────────────────────────────────┴────────────────────────┴───────────────────────────────┴──────────────────────────────┴──────────────────────────────┘ │
└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
]
Expand Down
9 changes: 4 additions & 5 deletions crates/store/re_sorbet/src/chunk_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,13 +216,12 @@ impl TryFrom<&ArrowSchema> for ChunkSchema {
let ArrowSchema { metadata, fields } = arrow_schema;

let chunk_id = {
let chunk_id = metadata.get_or_err("rerun.id")?;
let chunk_id = u128::from_str_radix(chunk_id, 16).map_err(|err| {
let chunk_id_str = metadata.get_or_err("rerun.id")?;
chunk_id_str.parse().map_err(|err| {
InvalidChunkSchema::custom(format!(
"Failed to deserialize chunk id {chunk_id:?}: {err}"
"Failed to deserialize chunk id {chunk_id_str:?}: {err}"
))
})?;
ChunkId::from_u128(chunk_id)
})?
};

let entity_path = EntityPath::parse_forgiving(metadata.get_or_err("rerun.entity_path")?);
Expand Down
38 changes: 16 additions & 22 deletions crates/store/re_types_core/src/id.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,14 @@ impl std::fmt::Display for ChunkId {
}
}

impl std::str::FromStr for ChunkId {
type Err = std::num::ParseIntError;

fn from_str(s: &str) -> Result<Self, Self::Err> {
re_tuid::Tuid::from_str(s).map(Self)
}
}

impl ChunkId {
pub const ZERO: Self = Self(re_tuid::Tuid::ZERO);
pub const MAX: Self = Self(re_tuid::Tuid::MAX);
Expand Down Expand Up @@ -69,21 +77,10 @@ impl ChunkId {
Self(self.0.incremented_by(n))
}

/// When the `ChunkId` was created, in nanoseconds since unix epoch.
#[inline]
pub fn nanoseconds_since_epoch(&self) -> u64 {
self.0.nanoseconds_since_epoch()
}

#[inline]
pub fn from_u128(id: u128) -> Self {
Self(re_tuid::Tuid::from_u128(id))
}

#[inline]
pub fn as_u128(&self) -> u128 {
self.0.as_u128()
}
}

impl re_byte_size::SizeBytes for ChunkId {
Expand Down Expand Up @@ -167,6 +164,14 @@ impl std::fmt::Display for RowId {
}
}

impl std::str::FromStr for RowId {
type Err = std::num::ParseIntError;

fn from_str(s: &str) -> Result<Self, Self::Err> {
re_tuid::Tuid::from_str(s).map(Self)
}
}

impl RowId {
pub const ZERO: Self = Self(re_tuid::Tuid::ZERO);
pub const MAX: Self = Self(re_tuid::Tuid::MAX);
Expand Down Expand Up @@ -206,21 +211,10 @@ impl RowId {
Self(self.0.incremented_by(n))
}

/// When the `RowId` was created, in nanoseconds since unix epoch.
#[inline]
pub fn nanoseconds_since_epoch(&self) -> u64 {
self.0.nanoseconds_since_epoch()
}

#[inline]
pub fn from_u128(id: u128) -> Self {
Self(re_tuid::Tuid::from_u128(id))
}

#[inline]
pub fn as_u128(&self) -> u128 {
self.0.as_u128()
}
}

impl re_byte_size::SizeBytes for RowId {
Expand Down
5 changes: 5 additions & 0 deletions crates/utils/re_tuid/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ all-features = true
[features]
default = []

## Enable bytemuck support.
bytemuck = ["dep:bytemuck"]

## Enable (de)serialization using serde.
serde = ["dep:serde"]

Expand All @@ -35,10 +38,12 @@ once_cell.workspace = true
web-time.workspace = true

# Optional dependencies
bytemuck = { workspace = true, optional = true }
serde = { workspace = true, features = ["derive"], optional = true }

[dev-dependencies]
criterion.workspace = true
rand = { workspace = true, features = ["std", "std_rng"] }

[lib]
bench = false
Expand Down
8 changes: 8 additions & 0 deletions crates/utils/re_tuid/benches/bench_tuid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ fn bench_tuid(c: &mut Criterion) {
group.bench_function("Tuid::new", |b| {
b.iter(|| criterion::black_box(re_tuid::Tuid::new()));
});

group.throughput(criterion::Throughput::Elements(1_000));
group.bench_function("Tuid::cmp", |b| {
use rand::prelude::*;
let mut ids = (0..2_000).map(|_| re_tuid::Tuid::new()).collect::<Vec<_>>();
ids.shuffle(&mut rand::thread_rng());
b.iter(|| criterion::black_box(ids[0..1_000].cmp(&ids[1_000..2_000])));
});
}

criterion_group!(benches, bench_tuid);
Expand Down
126 changes: 97 additions & 29 deletions crates/utils/re_tuid/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,36 @@
#![doc = document_features::document_features!()]
//!
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
/// TUID: Time-based Unique Identifier.
///
/// Time-ordered globally unique 128-bit identifiers.
#[repr(C, align(1))]
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "bytemuck", derive(bytemuck::AnyBitPattern))]
#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
pub struct Tuid {
/// Approximate nanoseconds since epoch.
time_ns: u64,
/// A LE u64 encoded as bytes to keep the alignment of `Tuid` to 1.
time_ns: [u8; 8],

/// Initialized to something random on each thread,
/// then incremented for each new [`Tuid`] being allocated.
inc: u64,
/// A LE u64 encoded as bytes to keep the alignment of `Tuid` to 1.
inc: [u8; 8],
}

impl Ord for Tuid {
#[inline]
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.as_u128().cmp(&other.as_u128())
}
}

impl PartialOrd for Tuid {
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}

impl Tuid {
Expand All @@ -24,9 +45,28 @@ impl Tuid {
pub const ARROW_EXTENSION_NAME: &'static str = "rerun.datatypes.TUID";
}

/// Formats the [`Tuid`] as a hex string.
///
/// The format uses upper case for the first 16 hex digits, and lower case for the last 16 hex digits.
/// This is to make it easily distinguished from other hex strings.
///
/// Example: `182342300C5F8C327a7b4a6e5a379ac4`
impl std::fmt::Display for Tuid {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:032X}", self.as_u128())
write!(
f,
"{:016X}{:016x}",
self.nanoseconds_since_epoch(),
self.inc()
)
}
}

impl std::str::FromStr for Tuid {
type Err = std::num::ParseIntError;

fn from_str(s: &str) -> Result<Self, Self::Err> {
u128::from_str_radix(s, 16).map(Self::from_u128)
}
}

Expand All @@ -52,12 +92,15 @@ impl<'a> From<&'a Tuid> for std::borrow::Cow<'a, Tuid> {

impl Tuid {
/// All zeroes.
pub const ZERO: Self = Self { time_ns: 0, inc: 0 };
pub const ZERO: Self = Self {
time_ns: [0; 8],
inc: [0; 8],
};

/// All ones.
pub const MAX: Self = Self {
time_ns: u64::MAX,
inc: u64::MAX,
time_ns: u64::MAX.to_le_bytes(),
inc: u64::MAX.to_le_bytes(),
};

/// Create a new unique [`Tuid`] based on the current time.
Expand All @@ -67,24 +110,21 @@ impl Tuid {
use std::cell::RefCell;

thread_local! {
pub static LATEST_TUID: RefCell<Tuid> = RefCell::new(Tuid{
time_ns: monotonic_nanos_since_epoch(),
pub static LATEST_TUID: RefCell<Tuid> = RefCell::new(Tuid::from_nanos_and_inc(
monotonic_nanos_since_epoch(),

// Leave top bit at zero so we have plenty of room to grow.
inc: random_u64() & !(1_u64 << 63),
});
random_u64() & !(1_u64 << 63),
));
}

LATEST_TUID.with(|latest_tuid| {
let mut latest = latest_tuid.borrow_mut();

let new = Self {
time_ns: monotonic_nanos_since_epoch(),
inc: latest.inc + 1,
};
let new = Self::from_nanos_and_inc(monotonic_nanos_since_epoch(), latest.inc() + 1);

debug_assert!(
latest.time_ns <= new.time_ns,
latest.nanoseconds_since_epoch() <= new.nanoseconds_since_epoch(),
"Time should be monotonically increasing"
);

Expand All @@ -98,36 +138,36 @@ impl Tuid {
/// The first should be nano-seconds since epoch.
#[inline]
pub fn from_nanos_and_inc(time_ns: u64, inc: u64) -> Self {
Self { time_ns, inc }
Self {
time_ns: time_ns.to_le_bytes(),
inc: inc.to_le_bytes(),
}
}

#[inline]
pub fn from_u128(id: u128) -> Self {
Self {
time_ns: (id >> 64) as u64,
inc: (id & (!0 >> 64)) as u64,
}
Self::from_nanos_and_inc((id >> 64) as u64, (id & (!0 >> 64)) as u64)
}

#[inline]
pub fn as_u128(&self) -> u128 {
((self.time_ns as u128) << 64) | (self.inc as u128)
((self.nanoseconds_since_epoch() as u128) << 64) | (self.inc() as u128)
}

/// Approximate nanoseconds since unix epoch.
///
/// The upper 64 bits of the [`Tuid`].
#[inline]
pub fn nanoseconds_since_epoch(&self) -> u64 {
self.time_ns
u64::from_le_bytes(self.time_ns)
}

/// The increment part of the [`Tuid`].
///
/// The lower 64 bits of the [`Tuid`].
#[inline]
pub fn inc(&self) -> u64 {
self.inc
u64::from_le_bytes(self.inc)
}

/// Returns the next logical [`Tuid`].
Expand All @@ -143,7 +183,7 @@ impl Tuid {

Self {
time_ns,
inc: inc.wrapping_add(1),
inc: u64::from_le_bytes(inc).wrapping_add(1).to_le_bytes(),
}
}

Expand All @@ -160,7 +200,7 @@ impl Tuid {
let Self { time_ns, inc } = *self;
Self {
time_ns,
inc: inc.wrapping_add(n),
inc: u64::from_le_bytes(inc).wrapping_add(n).to_le_bytes(),
}
}

Expand Down Expand Up @@ -231,12 +271,40 @@ fn test_tuid() {
}

let num = 100_000;
let ids: Vec<Tuid> = (0..num).map(|_| Tuid::new()).collect();
let mut ids = Vec::with_capacity(num);
ids.push(Tuid::ZERO);
ids.push(Tuid::from_nanos_and_inc(123_456, 789_123));
ids.push(Tuid::from_nanos_and_inc(123_456, u64::MAX));
ids.extend((0..num - 5).map(|_| Tuid::new()));
ids.push(Tuid::from_nanos_and_inc(u64::MAX, 1));
ids.push(Tuid::MAX);

assert!(is_sorted(&ids));
assert_eq!(ids.iter().copied().collect::<HashSet::<Tuid>>().len(), num);
assert_eq!(ids.iter().copied().collect::<BTreeSet::<Tuid>>().len(), num);

for id in ids {
assert_eq!(id, Tuid::from_u128(id.as_u128()));
for &tuid in &ids {
assert_eq!(tuid, Tuid::from_u128(tuid.as_u128()));
assert_eq!(tuid, tuid.to_string().parse().unwrap());
}

let id_strings: Vec<String> = ids.iter().map(|id| id.to_string()).collect();
assert!(
is_sorted(&id_strings),
"Ids should sort the same when converted to strings"
);
}

#[test]
fn test_tuid_size_and_alignment() {
assert_eq!(std::mem::size_of::<Tuid>(), 16);
assert_eq!(std::mem::align_of::<Tuid>(), 1);
}

#[test]
fn test_tuid_formatting() {
assert_eq!(
Tuid::from_u128(0x182342300c5f8c327a7b4a6e5a379ac4).to_string(),
"182342300C5F8C327a7b4a6e5a379ac4"
);
}

0 comments on commit a4d3725

Please sign in to comment.