Skip to content

Commit

Permalink
Fix heap string
Browse files Browse the repository at this point in the history
  • Loading branch information
oovm committed Feb 28, 2023
1 parent e7a3895 commit d49dd92
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 99 deletions.
31 changes: 17 additions & 14 deletions projects/smart-string/src/manager/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use std::hash::{Hash, Hasher};
use std::sync::LazyLock;
use std::{
hash::{Hash, Hasher},
sync::LazyLock,
};

use ahash::{AHasher, RandomState};
use dashmap::DashMap;
use dashmap::mapref::one::Ref;

pub static STRING_MANAGER: LazyLock<StringManager> = LazyLock::new(|| StringManager::default());

Expand All @@ -16,28 +17,30 @@ pub struct StringManager {
impl Default for StringManager {
fn default() -> Self {
let hasher = RandomState::default();
Self {
cache: DashMap::with_hasher(hasher),
}
Self { cache: DashMap::with_hasher(hasher) }
}
}

impl StringManager {
pub fn get(&self, key: StringID) -> Option<Ref<StringID, String, RandomState>> {
self.cache.get(&key)
/// Get a reference to the string
pub fn get(&self, key: StringID) -> Option<&str> {
Some(self.cache.get(&key)?.as_ref())
}
pub fn insert<S>(&self, value: S) -> StringID where S: Into<String> {
pub fn get_hash_key(string: &str) -> StringID {
let mut hasher = AHasher::default();
let s = value.into();
s.hash(&mut hasher);
let hash = hasher.finish() as usize;
string.hash(&mut hasher);
hasher.finish() as usize
}

pub fn insert(&self, value: String) -> StringID {
let hash = Self::get_hash_key(&value);
if self.cache.contains_key(&hash) {
return hash;
}
self.cache.insert(hash, s);
self.cache.insert(hash, value);
hash
}
pub fn remove(&mut self, key: StringID) -> Option<String> {
self.cache.remove(&key).map(|v| v.1)
}
}
}
42 changes: 15 additions & 27 deletions projects/smart-string/src/smart/inlined.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use core::{mem::size_of, ptr::copy_nonoverlapping};
use core::mem::size_of;
use std::mem::transmute;

use crate::SmartString;
Expand All @@ -9,36 +9,16 @@ pub const LENGTH_MASK: u8 = 0b11000000;

/// A buffer stored on the stack whose size is equal to the stack size of `String`
#[repr(transparent)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct InlineBuffer([u8; MAX_SIZE]);

impl InlineBuffer {
/// Construct a new [`InlineString`]. A string that lives in a small buffer on the stack
///
/// SAFETY:
/// * The caller must guarantee that the length of `text` is less than [`MAX_SIZE`]
#[inline]
pub unsafe fn new(text: &str) -> Self {
debug_assert!(text.len() <= MAX_SIZE);

let len = text.len();
let mut buffer = [0u8; MAX_SIZE];

// set the length in the last byte
buffer[MAX_SIZE - 1] = len as u8 | LENGTH_MASK;

// copy the string into our buffer
//
// note: in the case where len == MAX_SIZE, we'll overwrite the len, but that's okay because
// when reading the length we can detect that the last byte is part of UTF-8 and return a
// length of MAX_SIZE
//
// SAFETY:
// * src (`text`) is valid for `len` bytes because `len` comes from `text`
// * dst (`buffer`) is valid for `len` bytes because we assert src is less than MAX_SIZE
// * src and dst don't overlap because we created dst
//
copy_nonoverlapping(text.as_ptr(), buffer.as_mut_ptr(), len);

#[inline(always)]
pub unsafe fn new(buffer: [u8; MAX_SIZE]) -> Self {
InlineBuffer(buffer)
}

Expand Down Expand Up @@ -88,14 +68,22 @@ impl InlineBuffer {
self.0[MAX_SIZE - 1] = len as u8 | LENGTH_MASK;
}
}
pub fn get_len(&self) -> usize {
let len = self.0[MAX_SIZE - 1] & !LENGTH_MASK;
if len == MAX_SIZE as u8 { MAX_SIZE } else { len as usize }
}

#[inline(always)]
pub fn copy(&self) -> Self {
InlineBuffer(self.0)
}

#[allow(clippy::wrong_self_convention)]
pub unsafe fn as_smart_string(self) -> SmartString {
transmute::<Self, SmartString>(self)
pub unsafe fn as_smart_string(&self) -> SmartString {
transmute::<Self, SmartString>(*self)
}
pub unsafe fn as_str(&self) -> &str {
let len = self.get_len();
let ptr = self.0.as_ptr() as *const u8;
std::str::from_utf8_unchecked(std::slice::from_raw_parts(ptr, len))
}
}
145 changes: 89 additions & 56 deletions projects/smart-string/src/smart/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
use std::fmt::{Debug, Formatter};
use std::{
fmt::{Debug, Formatter, UpperHex},
mem::transmute,
str::from_utf8_unchecked,
};

use compact_str::CompactString;

pub mod inlined;
use crate::{InlineBuffer, StringManager, MAX_SIZE, STRING_MANAGER};

use crate::STRING_MANAGER;
pub mod inlined;
pub mod on_heap;

/// **Managed**: `Rc<String>`, `Arc<String>`
///
Expand All @@ -12,12 +18,12 @@ use crate::STRING_MANAGER;
/// pub struct SmartString {
/// pointer: u64,
/// length: u64,
/// extra: [u8; 7],
/// fill: [u8; 7],
/// kind: SmartStringKind,
/// }
/// ```
#[repr(C)]
#[derive(Debug)]
#[derive(Copy, Clone, Debug)]
pub struct SmartString {
/// `*const ()`
pointer: usize,
Expand All @@ -30,62 +36,78 @@ pub struct SmartString {
kind: u8,
}

impl Debug for SmartStringKind {
impl UpperHex for SmartString {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SmartStringKind")
.field("kind", &self as &u8)
.finish()
for byte in self.as_bytes() {
write!(f, "{:02X}", byte)?;
}
Ok(())
}
}

// impl Debug for SmartString {
// fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
// f.debug_struct("SmartStringKind").field("kind", &self.kind).finish()
// }
// }

impl Default for SmartString {
fn default() -> Self {
Self {
pointer: 0,
length: 0,
fill32: 0,
fill16: 0,
fill8: 0,
kind: SmartStringKind::Inlined as u8,
}
Self { pointer: 0, length: 0, fill32: 0, fill16: 0, fill8: 0, kind: SmartStringKind::Inlined as u8 }
}
}


impl SmartString {
#[inline]
pub fn new(s: &str) -> Self {
todo!()
pub fn new(str: &str) -> SmartString {
match Self::try_inline(str) {
Some(s) => s,
None => Self::try_managed(str.to_string()),
}
}
/// Create a new managed string
#[inline]
pub fn try_managed(string: String) -> SmartString {
let length = string.len();
let pointer = STRING_MANAGER.insert(string);
Self { pointer, length, fill32: 0, fill16: 0, fill8: 0, kind: SmartStringKind::Managed as u8 }
}
/// Create a intern string without checking if it already exists
#[inline]
pub fn managed(s: &str) -> SmartString {
let id = STRING_MANAGER.insert(s);
pub unsafe fn managed(string: &str) -> SmartString {
Self {
pointer: id,
length: s.len(),
pointer: StringManager::get_hash_key(&string),
length: string.len(),
fill32: 0,
fill16: 0,
fill8: 0,
kind: SmartStringKind::Managed as u8,
}
}
// 192 bits / (char = 8bits) = 24 chars
pub fn inlined(s: &str) -> Option<SmartString> {
if s.as_bytes().contains(&0) {
return None;
}
/// Create a new static string, remove the lifetime
#[inline]
pub unsafe fn static_str(s: &'static str) -> SmartString {
let pointer = s.as_ptr() as usize;
Self { pointer, length: s.len(), fill32: 0, fill16: 0, fill8: 0, kind: SmartStringKind::Static as u8 }
}
#[inline]
pub fn try_inline(s: &str) -> Option<SmartString> {
if s.chars().count() > 24 {
return None;
}
if s.len() > 8 {
return None;
}
CompactString::new_inline()
let inline = InlineBuffer::new_const(text);
Repr::from_inline(inline)
unsafe { Some(Self::inlined(s)) }
}
/// Create a new inlined string
/// 192 bits / (char = 8bits) = 24 chars
#[inline]
pub unsafe fn inlined(s: &str) -> SmartString {
unsafe { InlineBuffer::new_const(s).as_smart_string() }
}
/// Create a new string on the heap
#[inline]
pub fn heap(s: &str) -> SmartString {
todo!()
let pointer = Box::new(s).as_ptr() as usize;
Self { pointer, length: s.len(), fill32: 0, fill16: 0, fill8: 0, kind: 0 }
}
}

Expand All @@ -102,71 +124,82 @@ impl SmartString {
}
pub fn len(&self) -> usize {
match self.kind() {
SmartStringKind::Inlined => {
todo!()
}
SmartStringKind::Inlined => (self.kind & 0b1111_1111) as usize,
SmartStringKind::Static => self.length,
SmartStringKind::Managed => self.length,
SmartStringKind::Heap => self.length,
}
}
pub fn as_str(&self) -> &str {
let s = CompactString::new("");
match self.kind() {
SmartStringKind::Inlined => todo!(),
SmartStringKind::Static => {
todo!()
}
SmartStringKind::Managed => {
todo!()
}
SmartStringKind::Managed => match STRING_MANAGER.get(self.pointer) {
Some(s) => s.as_ref(),
None => "",
},
SmartStringKind::Heap => {
todo!()
}
}
}
}

#[test]
fn test() {
let s1 = SmartString::managed("a");
let s2 = SmartString::managed("a");
println!("{:?}", s1);
println!("{:?}", s2);
}

#[repr(u8)]
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
#[derive(Copy, Clone, Eq, PartialEq, Hash)]
pub enum SmartStringKind {
/// Inlined Layout
/// ```js
/// xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
/// xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
/// xxxxxxxx xxxxxxxx xxxxxxxx xxxxxx00
/// xxxxxxxx xxxxxxxx xxxxxxxx len_mask + 00
/// ```
Inlined = 0,
/// Static Layout
/// ```js
/// &'static str
/// len: usize
/// ________ ________ ________ ______01
/// ```
Static = 1,
/// Managed Layout
/// ```js
/// u64
/// usize
/// ________ ________ ________ ______10
/// ```
Managed = 2,
/// Heap Layout
/// ```js
/// box
/// str
/// usize
/// box ________ ________ ______11
/// ```
Heap = 3,
}

impl From<&'static str> for SmartString {
fn from(s: &'static str) -> Self {
todo!()
unsafe { SmartString::static_str(s) }
}
}

impl SmartString {
pub unsafe fn as_static(&self) -> Option<&'static str> {
todo!()
pub fn as_static(&self) -> Option<&'static str> {
match self.kind() {
SmartStringKind::Static => {
todo!()
}
_ => None,
}
}
pub fn as_managed(&self) -> Option<&str> {
todo!()
}
pub fn as_bytes(self) -> [u8; MAX_SIZE] {
unsafe { transmute(self) }
}
}
1 change: 1 addition & 0 deletions projects/smart-string/src/smart/on_heap.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

Loading

0 comments on commit d49dd92

Please sign in to comment.