Improve Tuid (#8999)

### Related * Part of #8992 ### What Improve `re_tuid::Tuid`, and by extension `RowId` and `ChunkId` (which are just typesafe wrappers around `Tuid`) ### Details * Implement `from_str` for `Tuid/RowId/ChunkId` * Slightly modify how Tuid is formatted as a string (in backwards/forwards compatible way!)] * Implement bytemucking * Align to bytes (see below) ### Change `Tuid` alignment from `8` to `1` This will allow us to cast raw bytes into a `&[RowId]`. `arrow-rs` already aligns all allocations to 64 bytes, so _theoretically_ there would be nothing to stop us from already doing this, but this is the "better safe than sorry" approach. `uuid:::Uuuid` is also aligned to bytes, FWIW. * `Tuid::new` becomes ~7% slower * `Tuid::cmp` is unaffected --------- Co-authored-by: Clement Rey <[email protected]>
rerun-io · Feb 12, 2025 · a4d3725 · a4d3725
1 parent c795a0b
commit a4d3725
Show file tree

Hide file tree

Showing 7 changed files with 133 additions and 57 deletions.
diff --git a/Cargo.lock b/Cargo.lock
@@ -6646,10 +6646,12 @@ dependencies = [
 name = "re_tuid"
 version = "0.23.0-alpha.1+dev"
 dependencies = [
+ "bytemuck",
  "criterion",
  "document-features",
  "getrandom",
  "once_cell",
+ "rand",
  "re_byte_size",
  "serde",
  "web-time",

diff --git a/crates/store/re_chunk_store/tests/snapshots/formatting__format_chunk_store.snap b/crates/store/re_chunk_store/tests/snapshots/formatting__format_chunk_store.snap
@@ -28,7 +28,7 @@ ChunkStore {
         │ │ kind: "control"                  ┆ is_sorted: "true"      ┆ is_sorted: "true"             ┆ kind: "data"                 ┆ kind: "data"                 │ │
         │ │                                  ┆ kind: "index"          ┆ kind: "index"                 ┆                              ┆                              │ │
         │ ╞══════════════════════════════════╪════════════════════════╪═══════════════════════════════╪══════════════════════════════╪══════════════════════════════╡ │
-        │ │ 0000000067816A6BB4B8C1254D40007B ┆ 1                      ┆ 2025-01-10T18:43:42.123456789 ┆ [0, 1, 2]                    ┆ [0, 1, 2]                    │ │
+        │ │ 0000000067816A6Bb4b8c1254d40007b ┆ 1                      ┆ 2025-01-10T18:43:42.123456789 ┆ [0, 1, 2]                    ┆ [0, 1, 2]                    │ │
         │ └──────────────────────────────────┴────────────────────────┴───────────────────────────────┴──────────────────────────────┴──────────────────────────────┘ │
         └─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
     ]

diff --git a/crates/store/re_sorbet/src/chunk_schema.rs b/crates/store/re_sorbet/src/chunk_schema.rs
@@ -216,13 +216,12 @@ impl TryFrom<&ArrowSchema> for ChunkSchema {
         let ArrowSchema { metadata, fields } = arrow_schema;
 
         let chunk_id = {
-            let chunk_id = metadata.get_or_err("rerun.id")?;
-            let chunk_id = u128::from_str_radix(chunk_id, 16).map_err(|err| {
+            let chunk_id_str = metadata.get_or_err("rerun.id")?;
+            chunk_id_str.parse().map_err(|err| {
                 InvalidChunkSchema::custom(format!(
-                    "Failed to deserialize chunk id {chunk_id:?}: {err}"
+                    "Failed to deserialize chunk id {chunk_id_str:?}: {err}"
                 ))
-            })?;
-            ChunkId::from_u128(chunk_id)
+            })?
         };
 
         let entity_path = EntityPath::parse_forgiving(metadata.get_or_err("rerun.entity_path")?);

diff --git a/crates/store/re_types_core/src/id.rs b/crates/store/re_types_core/src/id.rs
@@ -35,6 +35,14 @@ impl std::fmt::Display for ChunkId {
     }
 }
 
+impl std::str::FromStr for ChunkId {
+    type Err = std::num::ParseIntError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        re_tuid::Tuid::from_str(s).map(Self)
+    }
+}
+
 impl ChunkId {
     pub const ZERO: Self = Self(re_tuid::Tuid::ZERO);
     pub const MAX: Self = Self(re_tuid::Tuid::MAX);
@@ -69,21 +77,10 @@ impl ChunkId {
         Self(self.0.incremented_by(n))
     }
 
-    /// When the `ChunkId` was created, in nanoseconds since unix epoch.
-    #[inline]
-    pub fn nanoseconds_since_epoch(&self) -> u64 {
-        self.0.nanoseconds_since_epoch()
-    }
-
     #[inline]
     pub fn from_u128(id: u128) -> Self {
         Self(re_tuid::Tuid::from_u128(id))
     }
-
-    #[inline]
-    pub fn as_u128(&self) -> u128 {
-        self.0.as_u128()
-    }
 }
 
 impl re_byte_size::SizeBytes for ChunkId {
@@ -167,6 +164,14 @@ impl std::fmt::Display for RowId {
     }
 }
 
+impl std::str::FromStr for RowId {
+    type Err = std::num::ParseIntError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        re_tuid::Tuid::from_str(s).map(Self)
+    }
+}
+
 impl RowId {
     pub const ZERO: Self = Self(re_tuid::Tuid::ZERO);
     pub const MAX: Self = Self(re_tuid::Tuid::MAX);
@@ -206,21 +211,10 @@ impl RowId {
         Self(self.0.incremented_by(n))
     }
 
-    /// When the `RowId` was created, in nanoseconds since unix epoch.
-    #[inline]
-    pub fn nanoseconds_since_epoch(&self) -> u64 {
-        self.0.nanoseconds_since_epoch()
-    }
-
     #[inline]
     pub fn from_u128(id: u128) -> Self {
         Self(re_tuid::Tuid::from_u128(id))
     }
-
-    #[inline]
-    pub fn as_u128(&self) -> u128 {
-        self.0.as_u128()
-    }
 }
 
 impl re_byte_size::SizeBytes for RowId {

diff --git a/crates/utils/re_tuid/Cargo.toml b/crates/utils/re_tuid/Cargo.toml
@@ -22,6 +22,9 @@ all-features = true
 [features]
 default = []
 
+## Enable bytemuck support.
+bytemuck = ["dep:bytemuck"]
+
 ## Enable (de)serialization using serde.
 serde = ["dep:serde"]
 
@@ -35,10 +38,12 @@ once_cell.workspace = true
 web-time.workspace = true
 
 # Optional dependencies
+bytemuck = { workspace = true, optional = true }
 serde = { workspace = true, features = ["derive"], optional = true }
 
 [dev-dependencies]
 criterion.workspace = true
+rand = { workspace = true, features = ["std", "std_rng"] }
 
 [lib]
 bench = false

diff --git a/crates/utils/re_tuid/benches/bench_tuid.rs b/crates/utils/re_tuid/benches/bench_tuid.rs
@@ -6,6 +6,14 @@ fn bench_tuid(c: &mut Criterion) {
     group.bench_function("Tuid::new", |b| {
         b.iter(|| criterion::black_box(re_tuid::Tuid::new()));
     });
+
+    group.throughput(criterion::Throughput::Elements(1_000));
+    group.bench_function("Tuid::cmp", |b| {
+        use rand::prelude::*;
+        let mut ids = (0..2_000).map(|_| re_tuid::Tuid::new()).collect::<Vec<_>>();
+        ids.shuffle(&mut rand::thread_rng());
+        b.iter(|| criterion::black_box(ids[0..1_000].cmp(&ids[1_000..2_000])));
+    });
 }
 
 criterion_group!(benches, bench_tuid);

diff --git a/crates/utils/re_tuid/src/lib.rs b/crates/utils/re_tuid/src/lib.rs
@@ -6,15 +6,36 @@
 #![doc = document_features::document_features!()]
 //!
 
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+/// TUID: Time-based Unique Identifier.
+///
+/// Time-ordered globally unique 128-bit identifiers.
+#[repr(C, align(1))]
+#[derive(Clone, Copy, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "bytemuck", derive(bytemuck::AnyBitPattern))]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
 pub struct Tuid {
     /// Approximate nanoseconds since epoch.
-    time_ns: u64,
+    /// A LE u64 encoded as bytes to keep the alignment of `Tuid` to 1.
+    time_ns: [u8; 8],
 
     /// Initialized to something random on each thread,
     /// then incremented for each new [`Tuid`] being allocated.
-    inc: u64,
+    /// A LE u64 encoded as bytes to keep the alignment of `Tuid` to 1.
+    inc: [u8; 8],
+}
+
+impl Ord for Tuid {
+    #[inline]
+    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
+        self.as_u128().cmp(&other.as_u128())
+    }
+}
+
+impl PartialOrd for Tuid {
+    #[inline]
+    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+        Some(self.cmp(other))
+    }
 }
 
 impl Tuid {
@@ -24,9 +45,28 @@ impl Tuid {
     pub const ARROW_EXTENSION_NAME: &'static str = "rerun.datatypes.TUID";
 }
 
+/// Formats the [`Tuid`] as a hex string.
+///
+/// The format uses upper case for the first 16 hex digits, and lower case for the last 16 hex digits.
+/// This is to make it easily distinguished from other hex strings.
+///
+/// Example: `182342300C5F8C327a7b4a6e5a379ac4`
 impl std::fmt::Display for Tuid {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{:032X}", self.as_u128())
+        write!(
+            f,
+            "{:016X}{:016x}",
+            self.nanoseconds_since_epoch(),
+            self.inc()
+        )
+    }
+}
+
+impl std::str::FromStr for Tuid {
+    type Err = std::num::ParseIntError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        u128::from_str_radix(s, 16).map(Self::from_u128)
     }
 }
 
@@ -52,12 +92,15 @@ impl<'a> From<&'a Tuid> for std::borrow::Cow<'a, Tuid> {
 
 impl Tuid {
     /// All zeroes.
-    pub const ZERO: Self = Self { time_ns: 0, inc: 0 };
+    pub const ZERO: Self = Self {
+        time_ns: [0; 8],
+        inc: [0; 8],
+    };
 
     /// All ones.
     pub const MAX: Self = Self {
-        time_ns: u64::MAX,
-        inc: u64::MAX,
+        time_ns: u64::MAX.to_le_bytes(),
+        inc: u64::MAX.to_le_bytes(),
     };
 
     /// Create a new unique [`Tuid`] based on the current time.
@@ -67,24 +110,21 @@ impl Tuid {
         use std::cell::RefCell;
 
         thread_local! {
-            pub static LATEST_TUID: RefCell<Tuid> = RefCell::new(Tuid{
-                time_ns: monotonic_nanos_since_epoch(),
+            pub static LATEST_TUID: RefCell<Tuid> = RefCell::new(Tuid::from_nanos_and_inc(
+                 monotonic_nanos_since_epoch(),
 
                 // Leave top bit at zero so we have plenty of room to grow.
-                inc: random_u64() & !(1_u64 << 63),
-            });
+                 random_u64() & !(1_u64 << 63),
+            ));
         }
 
         LATEST_TUID.with(|latest_tuid| {
             let mut latest = latest_tuid.borrow_mut();
 
-            let new = Self {
-                time_ns: monotonic_nanos_since_epoch(),
-                inc: latest.inc + 1,
-            };
+            let new = Self::from_nanos_and_inc(monotonic_nanos_since_epoch(), latest.inc() + 1);
 
             debug_assert!(
-                latest.time_ns <= new.time_ns,
+                latest.nanoseconds_since_epoch() <= new.nanoseconds_since_epoch(),
                 "Time should be monotonically increasing"
             );
 
@@ -98,36 +138,36 @@ impl Tuid {
     /// The first should be nano-seconds since epoch.
     #[inline]
     pub fn from_nanos_and_inc(time_ns: u64, inc: u64) -> Self {
-        Self { time_ns, inc }
+        Self {
+            time_ns: time_ns.to_le_bytes(),
+            inc: inc.to_le_bytes(),
+        }
     }
 
     #[inline]
     pub fn from_u128(id: u128) -> Self {
-        Self {
-            time_ns: (id >> 64) as u64,
-            inc: (id & (!0 >> 64)) as u64,
-        }
+        Self::from_nanos_and_inc((id >> 64) as u64, (id & (!0 >> 64)) as u64)
     }
 
     #[inline]
     pub fn as_u128(&self) -> u128 {
-        ((self.time_ns as u128) << 64) | (self.inc as u128)
+        ((self.nanoseconds_since_epoch() as u128) << 64) | (self.inc() as u128)
     }
 
     /// Approximate nanoseconds since unix epoch.
     ///
     /// The upper 64 bits of the [`Tuid`].
     #[inline]
     pub fn nanoseconds_since_epoch(&self) -> u64 {
-        self.time_ns
+        u64::from_le_bytes(self.time_ns)
     }
 
     /// The increment part of the [`Tuid`].
     ///
     /// The lower 64 bits of the [`Tuid`].
     #[inline]
     pub fn inc(&self) -> u64 {
-        self.inc
+        u64::from_le_bytes(self.inc)
     }
 
     /// Returns the next logical [`Tuid`].
@@ -143,7 +183,7 @@ impl Tuid {
 
         Self {
             time_ns,
-            inc: inc.wrapping_add(1),
+            inc: u64::from_le_bytes(inc).wrapping_add(1).to_le_bytes(),
         }
     }
 
@@ -160,7 +200,7 @@ impl Tuid {
         let Self { time_ns, inc } = *self;
         Self {
             time_ns,
-            inc: inc.wrapping_add(n),
+            inc: u64::from_le_bytes(inc).wrapping_add(n).to_le_bytes(),
         }
     }
 
@@ -231,12 +271,40 @@ fn test_tuid() {
     }
 
     let num = 100_000;
-    let ids: Vec<Tuid> = (0..num).map(|_| Tuid::new()).collect();
+    let mut ids = Vec::with_capacity(num);
+    ids.push(Tuid::ZERO);
+    ids.push(Tuid::from_nanos_and_inc(123_456, 789_123));
+    ids.push(Tuid::from_nanos_and_inc(123_456, u64::MAX));
+    ids.extend((0..num - 5).map(|_| Tuid::new()));
+    ids.push(Tuid::from_nanos_and_inc(u64::MAX, 1));
+    ids.push(Tuid::MAX);
+
     assert!(is_sorted(&ids));
     assert_eq!(ids.iter().copied().collect::<HashSet::<Tuid>>().len(), num);
     assert_eq!(ids.iter().copied().collect::<BTreeSet::<Tuid>>().len(), num);
 
-    for id in ids {
-        assert_eq!(id, Tuid::from_u128(id.as_u128()));
+    for &tuid in &ids {
+        assert_eq!(tuid, Tuid::from_u128(tuid.as_u128()));
+        assert_eq!(tuid, tuid.to_string().parse().unwrap());
     }
+
+    let id_strings: Vec<String> = ids.iter().map(|id| id.to_string()).collect();
+    assert!(
+        is_sorted(&id_strings),
+        "Ids should sort the same when converted to strings"
+    );
+}
+
+#[test]
+fn test_tuid_size_and_alignment() {
+    assert_eq!(std::mem::size_of::<Tuid>(), 16);
+    assert_eq!(std::mem::align_of::<Tuid>(), 1);
+}
+
+#[test]
+fn test_tuid_formatting() {
+    assert_eq!(
+        Tuid::from_u128(0x182342300c5f8c327a7b4a6e5a379ac4).to_string(),
+        "182342300C5F8C327a7b4a6e5a379ac4"
+    );
 }