Skip to content

Commit

Permalink
TinyUFO: add the option to use sharded skip list for storage
Browse files Browse the repository at this point in the history
This option makes it more memory efficient but a bit slower.
  • Loading branch information
eaufavor committed Mar 22, 2024
1 parent b9d4428 commit ab86012
Show file tree
Hide file tree
Showing 10 changed files with 476 additions and 111 deletions.
2 changes: 1 addition & 1 deletion .bleep
Original file line number Diff line number Diff line change
@@ -1 +1 @@
deb3c5409e938ec9c7d0da9b7a2d331eabbb2cd5
b1c09703606d32b02f24d2e77d82936ba95e8064
2 changes: 1 addition & 1 deletion pingora-memory-cache/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ pub struct MemoryCache<K: Hash, T: Clone> {
pub(crate) hasher: RandomState,
}

impl<K: Hash, T: Clone + Send + Sync> MemoryCache<K, T> {
impl<K: Hash, T: Clone + Send + Sync + 'static> MemoryCache<K, T> {
/// Create a new [MemoryCache] with the given size.
pub fn new(size: usize) -> Self {
MemoryCache {
Expand Down
6 changes: 3 additions & 3 deletions pingora-memory-cache/src/read_through.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ where
impl<K, T, CB, S> RTCache<K, T, CB, S>
where
K: Hash + Send,
T: Clone + Send + Sync,
T: Clone + Send + Sync + 'static,
{
/// Create a new [RTCache] of given size. `lock_age` defines how long a lock is valid for.
/// `lock_timeout` is used to stop a lookup from holding on to the key for too long.
Expand All @@ -142,7 +142,7 @@ where
impl<K, T, CB, S> RTCache<K, T, CB, S>
where
K: Hash + Send,
T: Clone + Send + Sync,
T: Clone + Send + Sync + 'static,
CB: Lookup<K, T, S>,
{
/// Query the cache for a given value. If it exists and no TTL is configured initially, it will
Expand Down Expand Up @@ -288,7 +288,7 @@ where
impl<K, T, CB, S> RTCache<K, T, CB, S>
where
K: Hash + Send,
T: Clone + Send + Sync,
T: Clone + Send + Sync + 'static,
CB: MultiLookup<K, T, S>,
{
/// Same behavior as [RTCache::get] but for an arbitrary amount of keys.
Expand Down
1 change: 1 addition & 0 deletions tinyufo/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ ahash = { workspace = true }
flurry = "<0.5.0" # Try not to require Rust 1.71
parking_lot = "0"
crossbeam-queue = "0"
crossbeam-skiplist = "0"

[dev-dependencies]
rand = "0"
Expand Down
14 changes: 7 additions & 7 deletions tinyufo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,12 @@ Because of TinyUFO's lock-free design, it greatly outperforms the others.

### Memory overhead

The table below show the memory allocation (in bytes) of the compared cache library under certain workloads to store zero-sized assets.
TinyUFO provides a compact mode to trade raw read speed for more memory efficiency. Whether the saving worthy the trade off depends on the actual size and the work load. For small in-memory assets, the saved memory means more things can be cached.

| cache size | TinyUFO | LRU | moka |
| -------- | ------- | ------- | ------ |
| 100 | 39,409 | 9,408 | 354,376
| 1000 | 236,053 | 128,512 | 535,888
| 10000 | 2,290,635 | 1,075,648 | 2,489,088
The table below show the memory allocation (in bytes) of the compared cache library under certain workloads to store zero-sized assets.

Whether these overheads matter depends on the actual sizes and volume of the assets. The more advanced algorithms are likely to be less memory efficient than the simple LRU.
| cache size | TinyUFO | TinyUFO compact | LRU | moka |
| -------- | ------- | ------- | ------- | ------ |
| 100 | 39,409 | 19,000 | 9,408 | 354,376
| 1000 | 236,053 | 86,352 | 128,512 | 535,888
| 10000 | 2,290,635 | 766,024| 1,075,648 | 2,489,088
28 changes: 28 additions & 0 deletions tinyufo/benches/bench_memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,22 @@ fn bench_tinyufo(zip_exp: f64, items: usize, cache_size_percent: f32) {
}
}

fn bench_tinyufo_compact(zip_exp: f64, items: usize, cache_size_percent: f32) {
let cache_size = (cache_size_percent * items as f32).round() as usize;
let tinyufo = tinyufo::TinyUfo::new_compact(cache_size, (cache_size as f32 * 1.0) as usize);

let mut rng = thread_rng();
let zipf = zipf::ZipfDistribution::new(items, zip_exp).unwrap();

for _ in 0..ITERATIONS {
let key = zipf.sample(&mut rng) as u64;

if tinyufo.get(&key).is_none() {
tinyufo.put(key, (), 1);
}
}
}

/*
cargo bench --bench bench_memory
Expand All @@ -78,6 +94,8 @@ moka
dhat: At t-gmax: 354,232 bytes in 1,581 blocks
TinyUFO
dhat: At t-gmax: 37,337 bytes in 351 blocks
TinyUFO compat
dhat: At t-gmax: 19,000 bytes in 60 blocks
total items 10000, cache size 10%
lru
Expand All @@ -86,6 +104,8 @@ moka
dhat: At t-gmax: 535,320 bytes in 7,278 blocks
TinyUFO
dhat: At t-gmax: 236,053 bytes in 2,182 blocks
TinyUFO Compact
dhat: At t-gmax: 86,352 bytes in 1,128 blocks
total items 100000, cache size 10%
lru
Expand All @@ -94,6 +114,8 @@ moka
dhat: At t-gmax: 2,489,088 bytes in 62,374 blocks
TinyUFO
dhat: At t-gmax: 2,290,635 bytes in 20,467 blocks
TinyUFO
dhat: At t-gmax: 766,024 bytes in 10,421 blocks
*/

fn main() {
Expand All @@ -116,5 +138,11 @@ fn main() {
bench_tinyufo(1.05, items, 0.1);
println!("\nTinyUFO");
}

{
let _profiler = dhat::Profiler::new_heap();
bench_tinyufo_compact(1.05, items, 0.1);
println!("\nTinyUFO Compact");
}
}
}
79 changes: 79 additions & 0 deletions tinyufo/benches/bench_perf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ Below is from Linux + Ryzen 5 7600 CPU
lru read total 150.423567ms, 30ns avg per operation, 33239472 ops per second
moka read total 462.133322ms, 92ns avg per operation, 10819389 ops per second
tinyufo read total 199.007359ms, 39ns avg per operation, 25124698 ops per second
tinyufo compact read total 331.145859ms, 66ns avg per operation, 15099087 ops per second
lru read total 5.402631847s, 1.08µs avg per operation, 925474 ops per second
...
Expand All @@ -45,6 +46,10 @@ tinyufo read total 208.346855ms, 41ns avg per operation, 23998444 ops per second
...
total 148691408 ops per second
tinyufo compact read total 539.403037ms, 107ns avg per operation, 9269507 ops per second
...
total 74130632 ops per second
lru mixed read/write 5.500309876s, 1.1µs avg per operation, 909039 ops per second, 407431 misses
...
total 6846743 ops per second
Expand All @@ -56,19 +61,25 @@ total 16557962 ops per second
tinyufo mixed read/write 456.134531ms, 91ns avg per operation, 10961678 ops per second, 294977 misses
...
total 80865792 ops per second
tinyufo compact mixed read/write 638.770053ms, 127ns avg per operation, 7827543 ops per second, 294641 misses
...
total 62600844 ops per second
*/

fn main() {
// we don't bench eviction here so make the caches large enough to hold all
let lru = Mutex::new(lru::LruCache::<u64, ()>::unbounded());
let moka = moka::sync::Cache::new(ITEMS as u64 + 10);
let tinyufo = tinyufo::TinyUfo::new(ITEMS + 10, 10);
let tinyufo_compact = tinyufo::TinyUfo::new_compact(ITEMS + 10, 10);

// populate first, then we bench access/promotion
for i in 0..ITEMS {
lru.lock().unwrap().put(i as u64, ());
moka.insert(i as u64, ());
tinyufo.put(i as u64, (), 1);
tinyufo_compact.put(i as u64, (), 1);
}

// single thread
Expand Down Expand Up @@ -108,6 +119,17 @@ fn main() {
(ITERATIONS as f32 / elapsed.as_secs_f32()) as u32
);

let before = Instant::now();
for _ in 0..ITERATIONS {
tinyufo_compact.get(&(zipf.sample(&mut rng) as u64));
}
let elapsed = before.elapsed();
println!(
"tinyufo compact read total {elapsed:?}, {:?} avg per operation, {} ops per second",
elapsed / ITERATIONS as u32,
(ITERATIONS as f32 / elapsed.as_secs_f32()) as u32
);

// concurrent

let before = Instant::now();
Expand Down Expand Up @@ -185,6 +207,31 @@ fn main() {
(ITERATIONS as f32 * THREADS as f32 / elapsed.as_secs_f32()) as u32
);

let before = Instant::now();
thread::scope(|s| {
for _ in 0..THREADS {
s.spawn(|| {
let mut rng = thread_rng();
let zipf = zipf::ZipfDistribution::new(ITEMS, 1.03).unwrap();
let before = Instant::now();
for _ in 0..ITERATIONS {
tinyufo_compact.get(&(zipf.sample(&mut rng) as u64));
}
let elapsed = before.elapsed();
println!(
"tinyufo compact read total {elapsed:?}, {:?} avg per operation, {} ops per second",
elapsed / ITERATIONS as u32,
(ITERATIONS as f32 / elapsed.as_secs_f32()) as u32
);
});
}
});
let elapsed = before.elapsed();
println!(
"total {} ops per second",
(ITERATIONS as f32 * THREADS as f32 / elapsed.as_secs_f32()) as u32
);

///// bench mixed read and write /////
const CACHE_SIZE: usize = 1000;
let items: usize = 10000;
Expand Down Expand Up @@ -287,4 +334,36 @@ fn main() {
"total {} ops per second",
(ITERATIONS as f32 * THREADS as f32 / elapsed.as_secs_f32()) as u32
);

let tinyufo_compact = tinyufo::TinyUfo::new(CACHE_SIZE, CACHE_SIZE);
let before = Instant::now();
thread::scope(|s| {
for _ in 0..THREADS {
s.spawn(|| {
let mut miss_count = 0;
let mut rng = thread_rng();
let zipf = zipf::ZipfDistribution::new(items, ZIPF_EXP).unwrap();
let before = Instant::now();
for _ in 0..ITERATIONS {
let key = zipf.sample(&mut rng) as u64;
if tinyufo_compact.get(&key).is_none() {
tinyufo_compact.put(key, (), 1);
miss_count +=1;
}
}
let elapsed = before.elapsed();
println!(
"tinyufo compact mixed read/write {elapsed:?}, {:?} avg per operation, {} ops per second, {miss_count} misses",
elapsed / ITERATIONS as u32,
(ITERATIONS as f32 / elapsed.as_secs_f32()) as u32,
);
});
}
});

let elapsed = before.elapsed();
println!(
"total {} ops per second",
(ITERATIONS as f32 * THREADS as f32 / elapsed.as_secs_f32()) as u32
);
}
Loading

0 comments on commit ab86012

Please sign in to comment.