diff --git a/projects/smart-string/src/manager/mod.rs b/projects/smart-string/src/manager/mod.rs index a5fbbf1..da17d35 100644 --- a/projects/smart-string/src/manager/mod.rs +++ b/projects/smart-string/src/manager/mod.rs @@ -1,9 +1,10 @@ -use std::hash::{Hash, Hasher}; -use std::sync::LazyLock; +use std::{ + hash::{Hash, Hasher}, + sync::LazyLock, +}; use ahash::{AHasher, RandomState}; use dashmap::DashMap; -use dashmap::mapref::one::Ref; pub static STRING_MANAGER: LazyLock = LazyLock::new(|| StringManager::default()); @@ -16,28 +17,30 @@ pub struct StringManager { impl Default for StringManager { fn default() -> Self { let hasher = RandomState::default(); - Self { - cache: DashMap::with_hasher(hasher), - } + Self { cache: DashMap::with_hasher(hasher) } } } impl StringManager { - pub fn get(&self, key: StringID) -> Option> { - self.cache.get(&key) + /// Get a reference to the string + pub fn get(&self, key: StringID) -> Option<&str> { + Some(self.cache.get(&key)?.as_ref()) } - pub fn insert(&self, value: S) -> StringID where S: Into { + pub fn get_hash_key(string: &str) -> StringID { let mut hasher = AHasher::default(); - let s = value.into(); - s.hash(&mut hasher); - let hash = hasher.finish() as usize; + string.hash(&mut hasher); + hasher.finish() as usize + } + + pub fn insert(&self, value: String) -> StringID { + let hash = Self::get_hash_key(&value); if self.cache.contains_key(&hash) { return hash; } - self.cache.insert(hash, s); + self.cache.insert(hash, value); hash } pub fn remove(&mut self, key: StringID) -> Option { self.cache.remove(&key).map(|v| v.1) } -} \ No newline at end of file +} diff --git a/projects/smart-string/src/smart/inlined.rs b/projects/smart-string/src/smart/inlined.rs index db1afef..5f07dcb 100644 --- a/projects/smart-string/src/smart/inlined.rs +++ b/projects/smart-string/src/smart/inlined.rs @@ -1,4 +1,4 @@ -use core::{mem::size_of, ptr::copy_nonoverlapping}; +use core::mem::size_of; use std::mem::transmute; use crate::SmartString; @@ -9,6 +9,7 @@ pub const LENGTH_MASK: u8 = 0b11000000; /// A buffer stored on the stack whose size is equal to the stack size of `String` #[repr(transparent)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct InlineBuffer([u8; MAX_SIZE]); impl InlineBuffer { @@ -16,29 +17,8 @@ impl InlineBuffer { /// /// SAFETY: /// * The caller must guarantee that the length of `text` is less than [`MAX_SIZE`] - #[inline] - pub unsafe fn new(text: &str) -> Self { - debug_assert!(text.len() <= MAX_SIZE); - - let len = text.len(); - let mut buffer = [0u8; MAX_SIZE]; - - // set the length in the last byte - buffer[MAX_SIZE - 1] = len as u8 | LENGTH_MASK; - - // copy the string into our buffer - // - // note: in the case where len == MAX_SIZE, we'll overwrite the len, but that's okay because - // when reading the length we can detect that the last byte is part of UTF-8 and return a - // length of MAX_SIZE - // - // SAFETY: - // * src (`text`) is valid for `len` bytes because `len` comes from `text` - // * dst (`buffer`) is valid for `len` bytes because we assert src is less than MAX_SIZE - // * src and dst don't overlap because we created dst - // - copy_nonoverlapping(text.as_ptr(), buffer.as_mut_ptr(), len); - + #[inline(always)] + pub unsafe fn new(buffer: [u8; MAX_SIZE]) -> Self { InlineBuffer(buffer) } @@ -88,14 +68,22 @@ impl InlineBuffer { self.0[MAX_SIZE - 1] = len as u8 | LENGTH_MASK; } } + pub fn get_len(&self) -> usize { + let len = self.0[MAX_SIZE - 1] & !LENGTH_MASK; + if len == MAX_SIZE as u8 { MAX_SIZE } else { len as usize } + } #[inline(always)] pub fn copy(&self) -> Self { InlineBuffer(self.0) } - #[allow(clippy::wrong_self_convention)] - pub unsafe fn as_smart_string(self) -> SmartString { - transmute::(self) + pub unsafe fn as_smart_string(&self) -> SmartString { + transmute::(*self) + } + pub unsafe fn as_str(&self) -> &str { + let len = self.get_len(); + let ptr = self.0.as_ptr() as *const u8; + std::str::from_utf8_unchecked(std::slice::from_raw_parts(ptr, len)) } } diff --git a/projects/smart-string/src/smart/mod.rs b/projects/smart-string/src/smart/mod.rs index 8072aa1..5d53d8e 100644 --- a/projects/smart-string/src/smart/mod.rs +++ b/projects/smart-string/src/smart/mod.rs @@ -1,9 +1,15 @@ -use std::fmt::{Debug, Formatter}; +use std::{ + fmt::{Debug, Formatter, UpperHex}, + mem::transmute, + str::from_utf8_unchecked, +}; + use compact_str::CompactString; -pub mod inlined; +use crate::{InlineBuffer, StringManager, MAX_SIZE, STRING_MANAGER}; -use crate::STRING_MANAGER; +pub mod inlined; +pub mod on_heap; /// **Managed**: `Rc`, `Arc` /// @@ -12,12 +18,12 @@ use crate::STRING_MANAGER; /// pub struct SmartString { /// pointer: u64, /// length: u64, -/// extra: [u8; 7], +/// fill: [u8; 7], /// kind: SmartStringKind, /// } /// ``` #[repr(C)] -#[derive(Debug)] +#[derive(Copy, Clone, Debug)] pub struct SmartString { /// `*const ()` pointer: usize, @@ -30,62 +36,78 @@ pub struct SmartString { kind: u8, } -impl Debug for SmartStringKind { +impl UpperHex for SmartString { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("SmartStringKind") - .field("kind", &self as &u8) - .finish() + for byte in self.as_bytes() { + write!(f, "{:02X}", byte)?; + } + Ok(()) } } +// impl Debug for SmartString { +// fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { +// f.debug_struct("SmartStringKind").field("kind", &self.kind).finish() +// } +// } + impl Default for SmartString { fn default() -> Self { - Self { - pointer: 0, - length: 0, - fill32: 0, - fill16: 0, - fill8: 0, - kind: SmartStringKind::Inlined as u8, - } + Self { pointer: 0, length: 0, fill32: 0, fill16: 0, fill8: 0, kind: SmartStringKind::Inlined as u8 } } } - impl SmartString { #[inline] - pub fn new(s: &str) -> Self { - todo!() + pub fn new(str: &str) -> SmartString { + match Self::try_inline(str) { + Some(s) => s, + None => Self::try_managed(str.to_string()), + } + } + /// Create a new managed string + #[inline] + pub fn try_managed(string: String) -> SmartString { + let length = string.len(); + let pointer = STRING_MANAGER.insert(string); + Self { pointer, length, fill32: 0, fill16: 0, fill8: 0, kind: SmartStringKind::Managed as u8 } } + /// Create a intern string without checking if it already exists #[inline] - pub fn managed(s: &str) -> SmartString { - let id = STRING_MANAGER.insert(s); + pub unsafe fn managed(string: &str) -> SmartString { Self { - pointer: id, - length: s.len(), + pointer: StringManager::get_hash_key(&string), + length: string.len(), fill32: 0, fill16: 0, fill8: 0, kind: SmartStringKind::Managed as u8, } } - // 192 bits / (char = 8bits) = 24 chars - pub fn inlined(s: &str) -> Option { - if s.as_bytes().contains(&0) { - return None; - } + /// Create a new static string, remove the lifetime + #[inline] + pub unsafe fn static_str(s: &'static str) -> SmartString { + let pointer = s.as_ptr() as usize; + Self { pointer, length: s.len(), fill32: 0, fill16: 0, fill8: 0, kind: SmartStringKind::Static as u8 } + } + #[inline] + pub fn try_inline(s: &str) -> Option { if s.chars().count() > 24 { return None; } - if s.len() > 8 { - return None; - } - CompactString::new_inline() - let inline = InlineBuffer::new_const(text); - Repr::from_inline(inline) + unsafe { Some(Self::inlined(s)) } } + /// Create a new inlined string + /// 192 bits / (char = 8bits) = 24 chars + #[inline] + pub unsafe fn inlined(s: &str) -> SmartString { + unsafe { InlineBuffer::new_const(s).as_smart_string() } + } + /// Create a new string on the heap + #[inline] pub fn heap(s: &str) -> SmartString { - todo!() + let pointer = Box::new(s).as_ptr() as usize; + Self { pointer, length: s.len(), fill32: 0, fill16: 0, fill8: 0, kind: 0 } } } @@ -102,15 +124,23 @@ impl SmartString { } pub fn len(&self) -> usize { match self.kind() { - SmartStringKind::Inlined => { - todo!() - } + SmartStringKind::Inlined => (self.kind & 0b1111_1111) as usize, + SmartStringKind::Static => self.length, + SmartStringKind::Managed => self.length, + SmartStringKind::Heap => self.length, + } + } + pub fn as_str(&self) -> &str { + let s = CompactString::new(""); + match self.kind() { + SmartStringKind::Inlined => todo!(), SmartStringKind::Static => { todo!() } - SmartStringKind::Managed => { - todo!() - } + SmartStringKind::Managed => match STRING_MANAGER.get(self.pointer) { + Some(s) => s.as_ref(), + None => "", + }, SmartStringKind::Heap => { todo!() } @@ -118,55 +148,58 @@ impl SmartString { } } -#[test] -fn test() { - let s1 = SmartString::managed("a"); - let s2 = SmartString::managed("a"); - println!("{:?}", s1); - println!("{:?}", s2); -} - #[repr(u8)] -#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] +#[derive(Copy, Clone, Eq, PartialEq, Hash)] pub enum SmartStringKind { /// Inlined Layout /// ```js /// xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx /// xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx - /// xxxxxxxx xxxxxxxx xxxxxxxx xxxxxx00 + /// xxxxxxxx xxxxxxxx xxxxxxxx len_mask + 00 /// ``` Inlined = 0, /// Static Layout /// ```js /// &'static str + /// len: usize /// ________ ________ ________ ______01 /// ``` Static = 1, /// Managed Layout /// ```js /// u64 + /// usize /// ________ ________ ________ ______10 /// ``` Managed = 2, /// Heap Layout /// ```js - /// box /// str + /// usize + /// box ________ ________ ______11 /// ``` Heap = 3, } impl From<&'static str> for SmartString { fn from(s: &'static str) -> Self { - todo!() + unsafe { SmartString::static_str(s) } } } impl SmartString { - pub unsafe fn as_static(&self) -> Option<&'static str> { - todo!() + pub fn as_static(&self) -> Option<&'static str> { + match self.kind() { + SmartStringKind::Static => { + todo!() + } + _ => None, + } } pub fn as_managed(&self) -> Option<&str> { todo!() } + pub fn as_bytes(self) -> [u8; MAX_SIZE] { + unsafe { transmute(self) } + } } diff --git a/projects/smart-string/src/smart/on_heap.rs b/projects/smart-string/src/smart/on_heap.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/projects/smart-string/src/smart/on_heap.rs @@ -0,0 +1 @@ + diff --git a/projects/smart-string/tests/main.rs b/projects/smart-string/tests/main.rs index f3ffc75..6d5f958 100644 --- a/projects/smart-string/tests/main.rs +++ b/projects/smart-string/tests/main.rs @@ -7,8 +7,29 @@ fn ready() { println!("it works!") } - #[test] fn keep_size_of() { assert_eq!(size_of::(), size_of::()) -} \ No newline at end of file +} + +#[test] +fn test() { + let m1 = SmartString::try_managed("managed".to_string()); + let m2 = SmartString::try_managed("managed".to_string()); + println!("{:?}", m1); + println!("{:?}", m2); + + unsafe { + let i1 = SmartString::inlined("inlined string"); + let i2 = SmartString::inlined("inlined string"); + println!("{:?}", i1); + println!("{:?}", i2); + } + + unsafe { + let h1 = SmartString::heap("heap string"); + let h2 = SmartString::heap("heap string"); + println!("{:?}", h1); + println!("{:?}", h2); + } +}