From a12ec420d017a9d2466212a0a9153a727fef788b Mon Sep 17 00:00:00 2001 From: Aster Date: Mon, 5 Jun 2023 14:14:02 +0800 Subject: [PATCH] Define the radix tree --- projects/module-path/Cargo.toml | 18 ++ projects/module-path/Readme.md | 0 projects/module-path/package.json | 6 + projects/module-path/src/errors.rs | 6 + projects/module-path/src/lib.rs | 3 + projects/module-path/src/manager/mod.rs | 46 +++++ projects/module-path/src/smart/inlined.rs | 89 ++++++++++ projects/module-path/src/smart/mod.rs | 205 ++++++++++++++++++++++ projects/module-path/src/smart/on_heap.rs | 1 + projects/module-path/tests/main.rs | 35 ++++ projects/module-path/tests/readme.md | 5 + 11 files changed, 414 insertions(+) create mode 100644 projects/module-path/Cargo.toml create mode 100644 projects/module-path/Readme.md create mode 100644 projects/module-path/package.json create mode 100644 projects/module-path/src/errors.rs create mode 100644 projects/module-path/src/lib.rs create mode 100644 projects/module-path/src/manager/mod.rs create mode 100644 projects/module-path/src/smart/inlined.rs create mode 100644 projects/module-path/src/smart/mod.rs create mode 100644 projects/module-path/src/smart/on_heap.rs create mode 100644 projects/module-path/tests/main.rs create mode 100644 projects/module-path/tests/readme.md diff --git a/projects/module-path/Cargo.toml b/projects/module-path/Cargo.toml new file mode 100644 index 0000000..a97d02c --- /dev/null +++ b/projects/module-path/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "module-path" +version = "0.0.0" +authors = ["Aster <192607617@qq.com>"] +description = "very suitable module names, compressed using a prefix tree" +repository = "https://github.com/oovm/sub_projects" +documentation = "https://docs.rs/sub_projects" +readme = "Readme.md" +license = "MPL-2.0" +edition = "2021" + +[dependencies] +radix-tree = "0.2.0" + +[dev-dependencies] + +[features] +default = [] diff --git a/projects/module-path/Readme.md b/projects/module-path/Readme.md new file mode 100644 index 0000000..e69de29 diff --git a/projects/module-path/package.json b/projects/module-path/package.json new file mode 100644 index 0000000..381d4be --- /dev/null +++ b/projects/module-path/package.json @@ -0,0 +1,6 @@ +{ + "private": true, + "scripts": { + "p": "cargo publish --allow-dirty" + } +} diff --git a/projects/module-path/src/errors.rs b/projects/module-path/src/errors.rs new file mode 100644 index 0000000..982023b --- /dev/null +++ b/projects/module-path/src/errors.rs @@ -0,0 +1,6 @@ +#[derive(Debug, Copy, Clone)] +pub enum Error { + UnknownError +} + +pub type Result = std::result::Result; diff --git a/projects/module-path/src/lib.rs b/projects/module-path/src/lib.rs new file mode 100644 index 0000000..4b3a3ad --- /dev/null +++ b/projects/module-path/src/lib.rs @@ -0,0 +1,3 @@ +#![feature(lazy_cell)] + +mod manager; \ No newline at end of file diff --git a/projects/module-path/src/manager/mod.rs b/projects/module-path/src/manager/mod.rs new file mode 100644 index 0000000..46e19f4 --- /dev/null +++ b/projects/module-path/src/manager/mod.rs @@ -0,0 +1,46 @@ +use std::{ + hash::{Hash, Hasher}, + sync::LazyLock, +}; + +use ahash::{AHasher, RandomState}; +use dashmap::DashMap; + +pub static STRING_MANAGER: LazyLock = LazyLock::new(|| StringManager::default()); + +pub type StringID = usize; + +pub struct StringManager { + cache: Radix +} + +impl Default for StringManager { + fn default() -> Self { + let hasher = RandomState::default(); + Self { cache: DashMap::with_hasher(hasher) } + } +} + +impl StringManager { + /// Get a reference to the string + pub fn get(&self, key: StringID) -> Option<&str> { + Some(self.cache.get(&key)?.as_ref()) + } + pub fn get_hash_key(string: &str) -> StringID { + let mut hasher = AHasher::default(); + string.hash(&mut hasher); + hasher.finish() as usize + } + + pub fn insert(&self, value: String) -> StringID { + let hash = Self::get_hash_key(&value); + if self.cache.contains_key(&hash) { + return hash; + } + self.cache.insert(hash, value); + hash + } + pub fn remove(&mut self, key: StringID) -> Option { + self.cache.remove(&key).map(|v| v.1) + } +} diff --git a/projects/module-path/src/smart/inlined.rs b/projects/module-path/src/smart/inlined.rs new file mode 100644 index 0000000..5f07dcb --- /dev/null +++ b/projects/module-path/src/smart/inlined.rs @@ -0,0 +1,89 @@ +use core::mem::size_of; +use std::mem::transmute; + +use crate::SmartString; + +pub const MAX_SIZE: usize = size_of::(); + +pub const LENGTH_MASK: u8 = 0b11000000; + +/// A buffer stored on the stack whose size is equal to the stack size of `String` +#[repr(transparent)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct InlineBuffer([u8; MAX_SIZE]); + +impl InlineBuffer { + /// Construct a new [`InlineString`]. A string that lives in a small buffer on the stack + /// + /// SAFETY: + /// * The caller must guarantee that the length of `text` is less than [`MAX_SIZE`] + #[inline(always)] + pub unsafe fn new(buffer: [u8; MAX_SIZE]) -> Self { + InlineBuffer(buffer) + } + + #[inline] + pub const fn new_const(text: &str) -> Self { + if text.len() > MAX_SIZE { + panic!("Provided string has a length greater than our MAX_SIZE"); + } + + let len = text.len(); + let mut buffer = [0u8; MAX_SIZE]; + + // set the length + buffer[MAX_SIZE - 1] = len as u8 | LENGTH_MASK; + + // Note: for loops aren't allowed in `const fn`, hence the while. + // Note: Iterating forward results in badly optimized code, because the compiler tries to + // unroll the loop. + let text = text.as_bytes(); + let mut i = len; + while i > 0 { + buffer[i - 1] = text[i - 1]; + i -= 1; + } + + InlineBuffer(buffer) + } + + /// Returns an empty [`InlineBuffer`] + #[inline(always)] + pub const fn empty() -> Self { + Self::new_const("") + } + + /// Set's the length of the content for this [`InlineBuffer`] + /// + /// # SAFETY: + /// * The caller must guarantee that `len` bytes in the buffer are valid UTF-8 + #[inline] + pub unsafe fn set_len(&mut self, len: usize) { + debug_assert!(len <= MAX_SIZE); + + // If `length` == MAX_SIZE, then we infer the length to be the capacity of the buffer. We + // can infer this because the way we encode length doesn't overlap with any valid UTF-8 + // bytes + if len < MAX_SIZE { + self.0[MAX_SIZE - 1] = len as u8 | LENGTH_MASK; + } + } + pub fn get_len(&self) -> usize { + let len = self.0[MAX_SIZE - 1] & !LENGTH_MASK; + if len == MAX_SIZE as u8 { MAX_SIZE } else { len as usize } + } + + #[inline(always)] + pub fn copy(&self) -> Self { + InlineBuffer(self.0) + } + + pub unsafe fn as_smart_string(&self) -> SmartString { + transmute::(*self) + } + pub unsafe fn as_str(&self) -> &str { + let len = self.get_len(); + let ptr = self.0.as_ptr() as *const u8; + std::str::from_utf8_unchecked(std::slice::from_raw_parts(ptr, len)) + } +} diff --git a/projects/module-path/src/smart/mod.rs b/projects/module-path/src/smart/mod.rs new file mode 100644 index 0000000..5d53d8e --- /dev/null +++ b/projects/module-path/src/smart/mod.rs @@ -0,0 +1,205 @@ +use std::{ + fmt::{Debug, Formatter, UpperHex}, + mem::transmute, + str::from_utf8_unchecked, +}; + +use compact_str::CompactString; + +use crate::{InlineBuffer, StringManager, MAX_SIZE, STRING_MANAGER}; + +pub mod inlined; +pub mod on_heap; + +/// **Managed**: `Rc`, `Arc` +/// +/// ``` +/// # use smart_string::SmartStringKind; +/// pub struct SmartString { +/// pointer: u64, +/// length: u64, +/// fill: [u8; 7], +/// kind: SmartStringKind, +/// } +/// ``` +#[repr(C)] +#[derive(Copy, Clone, Debug)] +pub struct SmartString { + /// `*const ()` + pointer: usize, + /// length of the string + length: usize, + #[cfg(target_pointer_width = "64")] + fill32: u32, + fill16: u16, + fill8: u8, + kind: u8, +} + +impl UpperHex for SmartString { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + for byte in self.as_bytes() { + write!(f, "{:02X}", byte)?; + } + Ok(()) + } +} + +// impl Debug for SmartString { +// fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { +// f.debug_struct("SmartStringKind").field("kind", &self.kind).finish() +// } +// } + +impl Default for SmartString { + fn default() -> Self { + Self { pointer: 0, length: 0, fill32: 0, fill16: 0, fill8: 0, kind: SmartStringKind::Inlined as u8 } + } +} + +impl SmartString { + #[inline] + pub fn new(str: &str) -> SmartString { + match Self::try_inline(str) { + Some(s) => s, + None => Self::try_managed(str.to_string()), + } + } + /// Create a new managed string + #[inline] + pub fn try_managed(string: String) -> SmartString { + let length = string.len(); + let pointer = STRING_MANAGER.insert(string); + Self { pointer, length, fill32: 0, fill16: 0, fill8: 0, kind: SmartStringKind::Managed as u8 } + } + /// Create a intern string without checking if it already exists + #[inline] + pub unsafe fn managed(string: &str) -> SmartString { + Self { + pointer: StringManager::get_hash_key(&string), + length: string.len(), + fill32: 0, + fill16: 0, + fill8: 0, + kind: SmartStringKind::Managed as u8, + } + } + /// Create a new static string, remove the lifetime + #[inline] + pub unsafe fn static_str(s: &'static str) -> SmartString { + let pointer = s.as_ptr() as usize; + Self { pointer, length: s.len(), fill32: 0, fill16: 0, fill8: 0, kind: SmartStringKind::Static as u8 } + } + #[inline] + pub fn try_inline(s: &str) -> Option { + if s.chars().count() > 24 { + return None; + } + unsafe { Some(Self::inlined(s)) } + } + /// Create a new inlined string + /// 192 bits / (char = 8bits) = 24 chars + #[inline] + pub unsafe fn inlined(s: &str) -> SmartString { + unsafe { InlineBuffer::new_const(s).as_smart_string() } + } + /// Create a new string on the heap + #[inline] + pub fn heap(s: &str) -> SmartString { + let pointer = Box::new(s).as_ptr() as usize; + Self { pointer, length: s.len(), fill32: 0, fill16: 0, fill8: 0, kind: 0 } + } +} + +impl SmartString { + #[inline(always)] + pub const fn kind(&self) -> SmartStringKind { + match self.kind { + 0b00 => SmartStringKind::Inlined, + 0b01 => SmartStringKind::Static, + 0b10 => SmartStringKind::Managed, + 0b11 => SmartStringKind::Heap, + _ => unreachable!(), + } + } + pub fn len(&self) -> usize { + match self.kind() { + SmartStringKind::Inlined => (self.kind & 0b1111_1111) as usize, + SmartStringKind::Static => self.length, + SmartStringKind::Managed => self.length, + SmartStringKind::Heap => self.length, + } + } + pub fn as_str(&self) -> &str { + let s = CompactString::new(""); + match self.kind() { + SmartStringKind::Inlined => todo!(), + SmartStringKind::Static => { + todo!() + } + SmartStringKind::Managed => match STRING_MANAGER.get(self.pointer) { + Some(s) => s.as_ref(), + None => "", + }, + SmartStringKind::Heap => { + todo!() + } + } + } +} + +#[repr(u8)] +#[derive(Copy, Clone, Eq, PartialEq, Hash)] +pub enum SmartStringKind { + /// Inlined Layout + /// ```js + /// xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + /// xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + /// xxxxxxxx xxxxxxxx xxxxxxxx len_mask + 00 + /// ``` + Inlined = 0, + /// Static Layout + /// ```js + /// &'static str + /// len: usize + /// ________ ________ ________ ______01 + /// ``` + Static = 1, + /// Managed Layout + /// ```js + /// u64 + /// usize + /// ________ ________ ________ ______10 + /// ``` + Managed = 2, + /// Heap Layout + /// ```js + /// str + /// usize + /// box ________ ________ ______11 + /// ``` + Heap = 3, +} + +impl From<&'static str> for SmartString { + fn from(s: &'static str) -> Self { + unsafe { SmartString::static_str(s) } + } +} + +impl SmartString { + pub fn as_static(&self) -> Option<&'static str> { + match self.kind() { + SmartStringKind::Static => { + todo!() + } + _ => None, + } + } + pub fn as_managed(&self) -> Option<&str> { + todo!() + } + pub fn as_bytes(self) -> [u8; MAX_SIZE] { + unsafe { transmute(self) } + } +} diff --git a/projects/module-path/src/smart/on_heap.rs b/projects/module-path/src/smart/on_heap.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/projects/module-path/src/smart/on_heap.rs @@ -0,0 +1 @@ + diff --git a/projects/module-path/tests/main.rs b/projects/module-path/tests/main.rs new file mode 100644 index 0000000..6d5f958 --- /dev/null +++ b/projects/module-path/tests/main.rs @@ -0,0 +1,35 @@ +use std::mem::size_of; + +use smart_string::SmartString; + +#[test] +fn ready() { + println!("it works!") +} + +#[test] +fn keep_size_of() { + assert_eq!(size_of::(), size_of::()) +} + +#[test] +fn test() { + let m1 = SmartString::try_managed("managed".to_string()); + let m2 = SmartString::try_managed("managed".to_string()); + println!("{:?}", m1); + println!("{:?}", m2); + + unsafe { + let i1 = SmartString::inlined("inlined string"); + let i2 = SmartString::inlined("inlined string"); + println!("{:?}", i1); + println!("{:?}", i2); + } + + unsafe { + let h1 = SmartString::heap("heap string"); + let h2 = SmartString::heap("heap string"); + println!("{:?}", h1); + println!("{:?}", h2); + } +} diff --git a/projects/module-path/tests/readme.md b/projects/module-path/tests/readme.md new file mode 100644 index 0000000..649841b --- /dev/null +++ b/projects/module-path/tests/readme.md @@ -0,0 +1,5 @@ +## Tests + +```bash +wee test +```