diff --git a/Cargo.lock b/Cargo.lock index 0f89a824864b..8d078bfbbb19 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6646,10 +6646,12 @@ dependencies = [ name = "re_tuid" version = "0.23.0-alpha.1+dev" dependencies = [ + "bytemuck", "criterion", "document-features", "getrandom", "once_cell", + "rand", "re_byte_size", "serde", "web-time", diff --git a/crates/store/re_chunk_store/tests/snapshots/formatting__format_chunk_store.snap b/crates/store/re_chunk_store/tests/snapshots/formatting__format_chunk_store.snap index eed4b3c5ee97..3ed50160c31a 100644 --- a/crates/store/re_chunk_store/tests/snapshots/formatting__format_chunk_store.snap +++ b/crates/store/re_chunk_store/tests/snapshots/formatting__format_chunk_store.snap @@ -28,7 +28,7 @@ ChunkStore { │ │ kind: "control" ┆ is_sorted: "true" ┆ is_sorted: "true" ┆ kind: "data" ┆ kind: "data" │ │ │ │ ┆ kind: "index" ┆ kind: "index" ┆ ┆ │ │ │ ╞══════════════════════════════════╪════════════════════════╪═══════════════════════════════╪══════════════════════════════╪══════════════════════════════╡ │ - │ │ 0000000067816A6BB4B8C1254D40007B ┆ 1 ┆ 2025-01-10T18:43:42.123456789 ┆ [0, 1, 2] ┆ [0, 1, 2] │ │ + │ │ 0000000067816A6Bb4b8c1254d40007b ┆ 1 ┆ 2025-01-10T18:43:42.123456789 ┆ [0, 1, 2] ┆ [0, 1, 2] │ │ │ └──────────────────────────────────┴────────────────────────┴───────────────────────────────┴──────────────────────────────┴──────────────────────────────┘ │ └─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ] diff --git a/crates/store/re_sorbet/src/chunk_schema.rs b/crates/store/re_sorbet/src/chunk_schema.rs index b090e76bc6bd..7ab91dc2d4cc 100644 --- a/crates/store/re_sorbet/src/chunk_schema.rs +++ b/crates/store/re_sorbet/src/chunk_schema.rs @@ -216,13 +216,12 @@ impl TryFrom<&ArrowSchema> for ChunkSchema { let ArrowSchema { metadata, fields } = arrow_schema; let chunk_id = { - let chunk_id = metadata.get_or_err("rerun.id")?; - let chunk_id = u128::from_str_radix(chunk_id, 16).map_err(|err| { + let chunk_id_str = metadata.get_or_err("rerun.id")?; + chunk_id_str.parse().map_err(|err| { InvalidChunkSchema::custom(format!( - "Failed to deserialize chunk id {chunk_id:?}: {err}" + "Failed to deserialize chunk id {chunk_id_str:?}: {err}" )) - })?; - ChunkId::from_u128(chunk_id) + })? }; let entity_path = EntityPath::parse_forgiving(metadata.get_or_err("rerun.entity_path")?); diff --git a/crates/store/re_types_core/src/id.rs b/crates/store/re_types_core/src/id.rs index 0b5ae00f61b5..354606460e44 100644 --- a/crates/store/re_types_core/src/id.rs +++ b/crates/store/re_types_core/src/id.rs @@ -35,6 +35,14 @@ impl std::fmt::Display for ChunkId { } } +impl std::str::FromStr for ChunkId { + type Err = std::num::ParseIntError; + + fn from_str(s: &str) -> Result { + re_tuid::Tuid::from_str(s).map(Self) + } +} + impl ChunkId { pub const ZERO: Self = Self(re_tuid::Tuid::ZERO); pub const MAX: Self = Self(re_tuid::Tuid::MAX); @@ -69,21 +77,10 @@ impl ChunkId { Self(self.0.incremented_by(n)) } - /// When the `ChunkId` was created, in nanoseconds since unix epoch. - #[inline] - pub fn nanoseconds_since_epoch(&self) -> u64 { - self.0.nanoseconds_since_epoch() - } - #[inline] pub fn from_u128(id: u128) -> Self { Self(re_tuid::Tuid::from_u128(id)) } - - #[inline] - pub fn as_u128(&self) -> u128 { - self.0.as_u128() - } } impl re_byte_size::SizeBytes for ChunkId { @@ -167,6 +164,14 @@ impl std::fmt::Display for RowId { } } +impl std::str::FromStr for RowId { + type Err = std::num::ParseIntError; + + fn from_str(s: &str) -> Result { + re_tuid::Tuid::from_str(s).map(Self) + } +} + impl RowId { pub const ZERO: Self = Self(re_tuid::Tuid::ZERO); pub const MAX: Self = Self(re_tuid::Tuid::MAX); @@ -206,21 +211,10 @@ impl RowId { Self(self.0.incremented_by(n)) } - /// When the `RowId` was created, in nanoseconds since unix epoch. - #[inline] - pub fn nanoseconds_since_epoch(&self) -> u64 { - self.0.nanoseconds_since_epoch() - } - #[inline] pub fn from_u128(id: u128) -> Self { Self(re_tuid::Tuid::from_u128(id)) } - - #[inline] - pub fn as_u128(&self) -> u128 { - self.0.as_u128() - } } impl re_byte_size::SizeBytes for RowId { diff --git a/crates/utils/re_tuid/Cargo.toml b/crates/utils/re_tuid/Cargo.toml index d9f5ed2f8e0c..86ca8e394101 100644 --- a/crates/utils/re_tuid/Cargo.toml +++ b/crates/utils/re_tuid/Cargo.toml @@ -22,6 +22,9 @@ all-features = true [features] default = [] +## Enable bytemuck support. +bytemuck = ["dep:bytemuck"] + ## Enable (de)serialization using serde. serde = ["dep:serde"] @@ -35,10 +38,12 @@ once_cell.workspace = true web-time.workspace = true # Optional dependencies +bytemuck = { workspace = true, optional = true } serde = { workspace = true, features = ["derive"], optional = true } [dev-dependencies] criterion.workspace = true +rand = { workspace = true, features = ["std", "std_rng"] } [lib] bench = false diff --git a/crates/utils/re_tuid/benches/bench_tuid.rs b/crates/utils/re_tuid/benches/bench_tuid.rs index 369a251ac641..811b67dbf2fd 100644 --- a/crates/utils/re_tuid/benches/bench_tuid.rs +++ b/crates/utils/re_tuid/benches/bench_tuid.rs @@ -6,6 +6,14 @@ fn bench_tuid(c: &mut Criterion) { group.bench_function("Tuid::new", |b| { b.iter(|| criterion::black_box(re_tuid::Tuid::new())); }); + + group.throughput(criterion::Throughput::Elements(1_000)); + group.bench_function("Tuid::cmp", |b| { + use rand::prelude::*; + let mut ids = (0..2_000).map(|_| re_tuid::Tuid::new()).collect::>(); + ids.shuffle(&mut rand::thread_rng()); + b.iter(|| criterion::black_box(ids[0..1_000].cmp(&ids[1_000..2_000]))); + }); } criterion_group!(benches, bench_tuid); diff --git a/crates/utils/re_tuid/src/lib.rs b/crates/utils/re_tuid/src/lib.rs index 557da26f085a..40a15525ee20 100644 --- a/crates/utils/re_tuid/src/lib.rs +++ b/crates/utils/re_tuid/src/lib.rs @@ -6,15 +6,36 @@ #![doc = document_features::document_features!()] //! -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +/// TUID: Time-based Unique Identifier. +/// +/// Time-ordered globally unique 128-bit identifiers. +#[repr(C, align(1))] +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "bytemuck", derive(bytemuck::AnyBitPattern))] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] pub struct Tuid { /// Approximate nanoseconds since epoch. - time_ns: u64, + /// A LE u64 encoded as bytes to keep the alignment of `Tuid` to 1. + time_ns: [u8; 8], /// Initialized to something random on each thread, /// then incremented for each new [`Tuid`] being allocated. - inc: u64, + /// A LE u64 encoded as bytes to keep the alignment of `Tuid` to 1. + inc: [u8; 8], +} + +impl Ord for Tuid { + #[inline] + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.as_u128().cmp(&other.as_u128()) + } +} + +impl PartialOrd for Tuid { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } } impl Tuid { @@ -24,9 +45,28 @@ impl Tuid { pub const ARROW_EXTENSION_NAME: &'static str = "rerun.datatypes.TUID"; } +/// Formats the [`Tuid`] as a hex string. +/// +/// The format uses upper case for the first 16 hex digits, and lower case for the last 16 hex digits. +/// This is to make it easily distinguished from other hex strings. +/// +/// Example: `182342300C5F8C327a7b4a6e5a379ac4` impl std::fmt::Display for Tuid { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:032X}", self.as_u128()) + write!( + f, + "{:016X}{:016x}", + self.nanoseconds_since_epoch(), + self.inc() + ) + } +} + +impl std::str::FromStr for Tuid { + type Err = std::num::ParseIntError; + + fn from_str(s: &str) -> Result { + u128::from_str_radix(s, 16).map(Self::from_u128) } } @@ -52,12 +92,15 @@ impl<'a> From<&'a Tuid> for std::borrow::Cow<'a, Tuid> { impl Tuid { /// All zeroes. - pub const ZERO: Self = Self { time_ns: 0, inc: 0 }; + pub const ZERO: Self = Self { + time_ns: [0; 8], + inc: [0; 8], + }; /// All ones. pub const MAX: Self = Self { - time_ns: u64::MAX, - inc: u64::MAX, + time_ns: u64::MAX.to_le_bytes(), + inc: u64::MAX.to_le_bytes(), }; /// Create a new unique [`Tuid`] based on the current time. @@ -67,24 +110,21 @@ impl Tuid { use std::cell::RefCell; thread_local! { - pub static LATEST_TUID: RefCell = RefCell::new(Tuid{ - time_ns: monotonic_nanos_since_epoch(), + pub static LATEST_TUID: RefCell = RefCell::new(Tuid::from_nanos_and_inc( + monotonic_nanos_since_epoch(), // Leave top bit at zero so we have plenty of room to grow. - inc: random_u64() & !(1_u64 << 63), - }); + random_u64() & !(1_u64 << 63), + )); } LATEST_TUID.with(|latest_tuid| { let mut latest = latest_tuid.borrow_mut(); - let new = Self { - time_ns: monotonic_nanos_since_epoch(), - inc: latest.inc + 1, - }; + let new = Self::from_nanos_and_inc(monotonic_nanos_since_epoch(), latest.inc() + 1); debug_assert!( - latest.time_ns <= new.time_ns, + latest.nanoseconds_since_epoch() <= new.nanoseconds_since_epoch(), "Time should be monotonically increasing" ); @@ -98,20 +138,20 @@ impl Tuid { /// The first should be nano-seconds since epoch. #[inline] pub fn from_nanos_and_inc(time_ns: u64, inc: u64) -> Self { - Self { time_ns, inc } + Self { + time_ns: time_ns.to_le_bytes(), + inc: inc.to_le_bytes(), + } } #[inline] pub fn from_u128(id: u128) -> Self { - Self { - time_ns: (id >> 64) as u64, - inc: (id & (!0 >> 64)) as u64, - } + Self::from_nanos_and_inc((id >> 64) as u64, (id & (!0 >> 64)) as u64) } #[inline] pub fn as_u128(&self) -> u128 { - ((self.time_ns as u128) << 64) | (self.inc as u128) + ((self.nanoseconds_since_epoch() as u128) << 64) | (self.inc() as u128) } /// Approximate nanoseconds since unix epoch. @@ -119,7 +159,7 @@ impl Tuid { /// The upper 64 bits of the [`Tuid`]. #[inline] pub fn nanoseconds_since_epoch(&self) -> u64 { - self.time_ns + u64::from_le_bytes(self.time_ns) } /// The increment part of the [`Tuid`]. @@ -127,7 +167,7 @@ impl Tuid { /// The lower 64 bits of the [`Tuid`]. #[inline] pub fn inc(&self) -> u64 { - self.inc + u64::from_le_bytes(self.inc) } /// Returns the next logical [`Tuid`]. @@ -143,7 +183,7 @@ impl Tuid { Self { time_ns, - inc: inc.wrapping_add(1), + inc: u64::from_le_bytes(inc).wrapping_add(1).to_le_bytes(), } } @@ -160,7 +200,7 @@ impl Tuid { let Self { time_ns, inc } = *self; Self { time_ns, - inc: inc.wrapping_add(n), + inc: u64::from_le_bytes(inc).wrapping_add(n).to_le_bytes(), } } @@ -231,12 +271,40 @@ fn test_tuid() { } let num = 100_000; - let ids: Vec = (0..num).map(|_| Tuid::new()).collect(); + let mut ids = Vec::with_capacity(num); + ids.push(Tuid::ZERO); + ids.push(Tuid::from_nanos_and_inc(123_456, 789_123)); + ids.push(Tuid::from_nanos_and_inc(123_456, u64::MAX)); + ids.extend((0..num - 5).map(|_| Tuid::new())); + ids.push(Tuid::from_nanos_and_inc(u64::MAX, 1)); + ids.push(Tuid::MAX); + assert!(is_sorted(&ids)); assert_eq!(ids.iter().copied().collect::>().len(), num); assert_eq!(ids.iter().copied().collect::>().len(), num); - for id in ids { - assert_eq!(id, Tuid::from_u128(id.as_u128())); + for &tuid in &ids { + assert_eq!(tuid, Tuid::from_u128(tuid.as_u128())); + assert_eq!(tuid, tuid.to_string().parse().unwrap()); } + + let id_strings: Vec = ids.iter().map(|id| id.to_string()).collect(); + assert!( + is_sorted(&id_strings), + "Ids should sort the same when converted to strings" + ); +} + +#[test] +fn test_tuid_size_and_alignment() { + assert_eq!(std::mem::size_of::(), 16); + assert_eq!(std::mem::align_of::(), 1); +} + +#[test] +fn test_tuid_formatting() { + assert_eq!( + Tuid::from_u128(0x182342300c5f8c327a7b4a6e5a379ac4).to_string(), + "182342300C5F8C327a7b4a6e5a379ac4" + ); }