Skip to content

Commit

Permalink
Define the radix tree
Browse files Browse the repository at this point in the history
  • Loading branch information
oovm committed Jun 5, 2023
1 parent d49dd92 commit a12ec42
Show file tree
Hide file tree
Showing 11 changed files with 414 additions and 0 deletions.
18 changes: 18 additions & 0 deletions projects/module-path/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[package]
name = "module-path"
version = "0.0.0"
authors = ["Aster <[email protected]>"]
description = "very suitable module names, compressed using a prefix tree"
repository = "https://github.com/oovm/sub_projects"
documentation = "https://docs.rs/sub_projects"
readme = "Readme.md"
license = "MPL-2.0"
edition = "2021"

[dependencies]
radix-tree = "0.2.0"

[dev-dependencies]

[features]
default = []
Empty file added projects/module-path/Readme.md
Empty file.
6 changes: 6 additions & 0 deletions projects/module-path/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"private": true,
"scripts": {
"p": "cargo publish --allow-dirty"
}
}
6 changes: 6 additions & 0 deletions projects/module-path/src/errors.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#[derive(Debug, Copy, Clone)]
pub enum Error {
UnknownError
}

pub type Result<T> = std::result::Result<T, Error>;
3 changes: 3 additions & 0 deletions projects/module-path/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#![feature(lazy_cell)]

mod manager;
46 changes: 46 additions & 0 deletions projects/module-path/src/manager/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
use std::{
hash::{Hash, Hasher},
sync::LazyLock,
};

use ahash::{AHasher, RandomState};
use dashmap::DashMap;

pub static STRING_MANAGER: LazyLock<StringManager> = LazyLock::new(|| StringManager::default());

pub type StringID = usize;

pub struct StringManager {
cache: Radix
}

impl Default for StringManager {
fn default() -> Self {
let hasher = RandomState::default();
Self { cache: DashMap::with_hasher(hasher) }
}
}

impl StringManager {
/// Get a reference to the string
pub fn get(&self, key: StringID) -> Option<&str> {
Some(self.cache.get(&key)?.as_ref())
}
pub fn get_hash_key(string: &str) -> StringID {
let mut hasher = AHasher::default();
string.hash(&mut hasher);
hasher.finish() as usize
}

pub fn insert(&self, value: String) -> StringID {
let hash = Self::get_hash_key(&value);
if self.cache.contains_key(&hash) {
return hash;
}
self.cache.insert(hash, value);
hash
}
pub fn remove(&mut self, key: StringID) -> Option<String> {
self.cache.remove(&key).map(|v| v.1)
}
}
89 changes: 89 additions & 0 deletions projects/module-path/src/smart/inlined.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
use core::mem::size_of;
use std::mem::transmute;

use crate::SmartString;

pub const MAX_SIZE: usize = size_of::<String>();

pub const LENGTH_MASK: u8 = 0b11000000;

/// A buffer stored on the stack whose size is equal to the stack size of `String`
#[repr(transparent)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct InlineBuffer([u8; MAX_SIZE]);

impl InlineBuffer {
/// Construct a new [`InlineString`]. A string that lives in a small buffer on the stack
///
/// SAFETY:
/// * The caller must guarantee that the length of `text` is less than [`MAX_SIZE`]
#[inline(always)]
pub unsafe fn new(buffer: [u8; MAX_SIZE]) -> Self {
InlineBuffer(buffer)
}

#[inline]
pub const fn new_const(text: &str) -> Self {
if text.len() > MAX_SIZE {
panic!("Provided string has a length greater than our MAX_SIZE");
}

let len = text.len();
let mut buffer = [0u8; MAX_SIZE];

// set the length
buffer[MAX_SIZE - 1] = len as u8 | LENGTH_MASK;

// Note: for loops aren't allowed in `const fn`, hence the while.
// Note: Iterating forward results in badly optimized code, because the compiler tries to
// unroll the loop.
let text = text.as_bytes();
let mut i = len;
while i > 0 {
buffer[i - 1] = text[i - 1];
i -= 1;
}

InlineBuffer(buffer)
}

/// Returns an empty [`InlineBuffer`]
#[inline(always)]
pub const fn empty() -> Self {
Self::new_const("")
}

/// Set's the length of the content for this [`InlineBuffer`]
///
/// # SAFETY:
/// * The caller must guarantee that `len` bytes in the buffer are valid UTF-8
#[inline]
pub unsafe fn set_len(&mut self, len: usize) {
debug_assert!(len <= MAX_SIZE);

// If `length` == MAX_SIZE, then we infer the length to be the capacity of the buffer. We
// can infer this because the way we encode length doesn't overlap with any valid UTF-8
// bytes
if len < MAX_SIZE {
self.0[MAX_SIZE - 1] = len as u8 | LENGTH_MASK;
}
}
pub fn get_len(&self) -> usize {
let len = self.0[MAX_SIZE - 1] & !LENGTH_MASK;
if len == MAX_SIZE as u8 { MAX_SIZE } else { len as usize }
}

#[inline(always)]
pub fn copy(&self) -> Self {
InlineBuffer(self.0)
}

pub unsafe fn as_smart_string(&self) -> SmartString {
transmute::<Self, SmartString>(*self)
}
pub unsafe fn as_str(&self) -> &str {
let len = self.get_len();
let ptr = self.0.as_ptr() as *const u8;
std::str::from_utf8_unchecked(std::slice::from_raw_parts(ptr, len))
}
}
205 changes: 205 additions & 0 deletions projects/module-path/src/smart/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
use std::{
fmt::{Debug, Formatter, UpperHex},
mem::transmute,
str::from_utf8_unchecked,
};

use compact_str::CompactString;

use crate::{InlineBuffer, StringManager, MAX_SIZE, STRING_MANAGER};

pub mod inlined;
pub mod on_heap;

/// **Managed**: `Rc<String>`, `Arc<String>`
///
/// ```
/// # use smart_string::SmartStringKind;
/// pub struct SmartString {
/// pointer: u64,
/// length: u64,
/// fill: [u8; 7],
/// kind: SmartStringKind,
/// }
/// ```
#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub struct SmartString {
/// `*const ()`
pointer: usize,
/// length of the string
length: usize,
#[cfg(target_pointer_width = "64")]
fill32: u32,
fill16: u16,
fill8: u8,
kind: u8,
}

impl UpperHex for SmartString {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
for byte in self.as_bytes() {
write!(f, "{:02X}", byte)?;
}
Ok(())
}
}

// impl Debug for SmartString {
// fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
// f.debug_struct("SmartStringKind").field("kind", &self.kind).finish()
// }
// }

impl Default for SmartString {
fn default() -> Self {
Self { pointer: 0, length: 0, fill32: 0, fill16: 0, fill8: 0, kind: SmartStringKind::Inlined as u8 }
}
}

impl SmartString {
#[inline]
pub fn new(str: &str) -> SmartString {
match Self::try_inline(str) {
Some(s) => s,
None => Self::try_managed(str.to_string()),
}
}
/// Create a new managed string
#[inline]
pub fn try_managed(string: String) -> SmartString {
let length = string.len();
let pointer = STRING_MANAGER.insert(string);
Self { pointer, length, fill32: 0, fill16: 0, fill8: 0, kind: SmartStringKind::Managed as u8 }
}
/// Create a intern string without checking if it already exists
#[inline]
pub unsafe fn managed(string: &str) -> SmartString {
Self {
pointer: StringManager::get_hash_key(&string),
length: string.len(),
fill32: 0,
fill16: 0,
fill8: 0,
kind: SmartStringKind::Managed as u8,
}
}
/// Create a new static string, remove the lifetime
#[inline]
pub unsafe fn static_str(s: &'static str) -> SmartString {
let pointer = s.as_ptr() as usize;
Self { pointer, length: s.len(), fill32: 0, fill16: 0, fill8: 0, kind: SmartStringKind::Static as u8 }
}
#[inline]
pub fn try_inline(s: &str) -> Option<SmartString> {
if s.chars().count() > 24 {
return None;
}
unsafe { Some(Self::inlined(s)) }
}
/// Create a new inlined string
/// 192 bits / (char = 8bits) = 24 chars
#[inline]
pub unsafe fn inlined(s: &str) -> SmartString {
unsafe { InlineBuffer::new_const(s).as_smart_string() }
}
/// Create a new string on the heap
#[inline]
pub fn heap(s: &str) -> SmartString {
let pointer = Box::new(s).as_ptr() as usize;
Self { pointer, length: s.len(), fill32: 0, fill16: 0, fill8: 0, kind: 0 }
}
}

impl SmartString {
#[inline(always)]
pub const fn kind(&self) -> SmartStringKind {
match self.kind {
0b00 => SmartStringKind::Inlined,
0b01 => SmartStringKind::Static,
0b10 => SmartStringKind::Managed,
0b11 => SmartStringKind::Heap,
_ => unreachable!(),
}
}
pub fn len(&self) -> usize {
match self.kind() {
SmartStringKind::Inlined => (self.kind & 0b1111_1111) as usize,
SmartStringKind::Static => self.length,
SmartStringKind::Managed => self.length,
SmartStringKind::Heap => self.length,
}
}
pub fn as_str(&self) -> &str {
let s = CompactString::new("");
match self.kind() {
SmartStringKind::Inlined => todo!(),
SmartStringKind::Static => {
todo!()
}
SmartStringKind::Managed => match STRING_MANAGER.get(self.pointer) {
Some(s) => s.as_ref(),
None => "",
},
SmartStringKind::Heap => {
todo!()
}
}
}
}

#[repr(u8)]
#[derive(Copy, Clone, Eq, PartialEq, Hash)]
pub enum SmartStringKind {
/// Inlined Layout
/// ```js
/// xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
/// xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
/// xxxxxxxx xxxxxxxx xxxxxxxx len_mask + 00
/// ```
Inlined = 0,
/// Static Layout
/// ```js
/// &'static str
/// len: usize
/// ________ ________ ________ ______01
/// ```
Static = 1,
/// Managed Layout
/// ```js
/// u64
/// usize
/// ________ ________ ________ ______10
/// ```
Managed = 2,
/// Heap Layout
/// ```js
/// str
/// usize
/// box ________ ________ ______11
/// ```
Heap = 3,
}

impl From<&'static str> for SmartString {
fn from(s: &'static str) -> Self {
unsafe { SmartString::static_str(s) }
}
}

impl SmartString {
pub fn as_static(&self) -> Option<&'static str> {
match self.kind() {
SmartStringKind::Static => {
todo!()
}
_ => None,
}
}
pub fn as_managed(&self) -> Option<&str> {
todo!()
}
pub fn as_bytes(self) -> [u8; MAX_SIZE] {
unsafe { transmute(self) }
}
}
1 change: 1 addition & 0 deletions projects/module-path/src/smart/on_heap.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

Loading

0 comments on commit a12ec42

Please sign in to comment.