diff --git a/.bleep b/.bleep index 4508557..a5b2b10 100644 --- a/.bleep +++ b/.bleep @@ -1 +1 @@ -ff2f3fb1c3c2b52d92fd5ff3b76b6f4d25488fb2 \ No newline at end of file +541a53e91e59e55dc42ea0ec37880b11edd6bfe9 \ No newline at end of file diff --git a/tinyufo/Cargo.toml b/tinyufo/Cargo.toml index f21b201..4b3b2c9 100644 --- a/tinyufo/Cargo.toml +++ b/tinyufo/Cargo.toml @@ -28,6 +28,7 @@ lru = "0" zipf = "7" moka = { version = "0", features = ["sync"] } dhat = "0" +quick_cache = "0.4" [[bench]] name = "bench_perf" diff --git a/tinyufo/benches/bench_hit_ratio.rs b/tinyufo/benches/bench_hit_ratio.rs index 72dacd5..23d0e58 100644 --- a/tinyufo/benches/bench_hit_ratio.rs +++ b/tinyufo/benches/bench_hit_ratio.rs @@ -23,6 +23,7 @@ fn bench_one(zip_exp: f64, cache_size_percent: f32) { let cache_size = (cache_size_percent * ITEMS as f32).round() as usize; let mut lru = lru::LruCache::::new(NonZeroUsize::new(cache_size).unwrap()); let moka = moka::sync::Cache::new(cache_size as u64); + let quick_cache = quick_cache::sync::Cache::new(cache_size); let tinyufo = tinyufo::TinyUfo::new(cache_size, cache_size); let mut rng = thread_rng(); @@ -30,6 +31,7 @@ fn bench_one(zip_exp: f64, cache_size_percent: f32) { let mut lru_hit = 0; let mut moka_hit = 0; + let mut quick_cache_hit = 0; let mut tinyufo_hit = 0; for _ in 0..ITERATIONS { @@ -47,6 +49,12 @@ fn bench_one(zip_exp: f64, cache_size_percent: f32) { moka.insert(key, ()); } + if quick_cache.get(&key).is_some() { + quick_cache_hit += 1; + } else { + quick_cache.insert(key, ()); + } + if tinyufo.get(&key).is_some() { tinyufo_hit += 1; } else { @@ -56,42 +64,46 @@ fn bench_one(zip_exp: f64, cache_size_percent: f32) { print!("{:.2}%\t\t", lru_hit as f32 / ITERATIONS as f32 * 100.0); print!("{:.2}%\t\t", moka_hit as f32 / ITERATIONS as f32 * 100.0); + print!( + "{:.2}%\t\t", + quick_cache_hit as f32 / ITERATIONS as f32 * 100.0 + ); println!("{:.2}%", tinyufo_hit as f32 / ITERATIONS as f32 * 100.0); } /* cargo bench --bench bench_hit_ratio -zipf & cache size lru moka TinyUFO -0.90, 0.005 19.23% 33.46% 33.35% -0.90, 0.01 26.21% 37.88% 40.10% -0.90, 0.05 45.59% 55.34% 57.81% -0.90, 0.1 55.73% 64.22% 66.34% -0.90, 0.25 71.18% 77.15% 78.53% -1.00, 0.005 31.09% 45.65% 45.13% -1.00, 0.01 39.17% 50.69% 52.23% -1.00, 0.05 58.73% 66.95% 68.81% -1.00, 0.1 67.57% 74.35% 75.93% -1.00, 0.25 79.91% 84.34% 85.27% -1.05, 0.005 37.68% 51.77% 51.26% -1.05, 0.01 46.11% 57.07% 58.41% -1.05, 0.05 65.04% 72.33% 73.91% -1.05, 0.1 73.11% 78.96% 80.22% -1.05, 0.25 83.77% 87.45% 88.16% -1.10, 0.005 44.48% 57.86% 57.25% -1.10, 0.01 52.97% 63.18% 64.23% -1.10, 0.05 70.94% 77.27% 78.57% -1.10, 0.1 78.11% 83.05% 84.06% -1.10, 0.25 87.08% 90.06% 90.62% -1.50, 0.005 85.25% 89.89% 89.68% -1.50, 0.01 89.88% 92.79% 92.94% -1.50, 0.05 96.04% 97.09% 97.25% -1.50, 0.1 97.52% 98.17% 98.26% -1.50, 0.25 98.81% 99.09% 99.10% +zipf & cache size lru moka QuickC TinyUFO +0.90, 0.005 19.24% 33.43% 32.33% 33.35% +0.90, 0.01 26.23% 37.86% 38.80% 40.06% +0.90, 0.05 45.58% 55.13% 55.71% 57.80% +0.90, 0.1 55.72% 64.15% 64.01% 66.36% +0.90, 0.25 71.16% 77.12% 75.92% 78.53% +1.00, 0.005 31.08% 45.68% 44.07% 45.15% +1.00, 0.01 39.17% 50.80% 50.90% 52.30% +1.00, 0.05 58.71% 66.92% 67.09% 68.79% +1.00, 0.1 67.59% 74.28% 74.00% 75.92% +1.00, 0.25 79.94% 84.35% 83.45% 85.28% +1.05, 0.005 37.66% 51.78% 50.13% 51.12% +1.05, 0.01 46.07% 57.13% 57.07% 58.41% +1.05, 0.05 65.06% 72.37% 72.41% 73.93% +1.05, 0.1 73.13% 78.97% 78.60% 80.24% +1.05, 0.25 83.74% 87.41% 86.68% 88.14% +1.10, 0.005 44.49% 57.84% 56.16% 57.28% +1.10, 0.01 52.97% 63.19% 62.99% 64.24% +1.10, 0.05 70.95% 77.24% 77.26% 78.55% +1.10, 0.1 78.05% 82.86% 82.66% 84.01% +1.10, 0.25 87.12% 90.10% 89.51% 90.66% +1.50, 0.005 85.27% 89.92% 89.08% 89.69% +1.50, 0.01 89.86% 92.77% 92.44% 92.94% +1.50, 0.05 96.01% 97.08% 96.99% 97.23% +1.50, 0.1 97.51% 98.15% 98.08% 98.24% +1.50, 0.25 98.81% 99.09% 99.03% 99.09% */ fn main() { - println!("zipf & cache size\t\tlru\t\tmoka\t\tTinyUFO",); + println!("zipf & cache size\t\tlru\t\tmoka\t\tQuickC\t\tTinyUFO",); for zif_exp in [0.9, 1.0, 1.05, 1.1, 1.5] { for cache_capacity in [0.005, 0.01, 0.05, 0.1, 0.25] { bench_one(zif_exp, cache_capacity); diff --git a/tinyufo/benches/bench_memory.rs b/tinyufo/benches/bench_memory.rs index 9d49210..271fe12 100644 --- a/tinyufo/benches/bench_memory.rs +++ b/tinyufo/benches/bench_memory.rs @@ -52,6 +52,22 @@ fn bench_moka(zip_exp: f64, items: usize, cache_size_percent: f32) { } } +fn bench_quick_cache(zip_exp: f64, items: usize, cache_size_percent: f32) { + let cache_size = (cache_size_percent * items as f32).round() as usize; + let quick_cache = quick_cache::sync::Cache::new(cache_size); + + let mut rng = thread_rng(); + let zipf = zipf::ZipfDistribution::new(items, zip_exp).unwrap(); + + for _ in 0..ITERATIONS { + let key = zipf.sample(&mut rng) as u64; + + if quick_cache.get(&key).is_none() { + quick_cache.insert(key, ()); + } + } +} + fn bench_tinyufo(zip_exp: f64, items: usize, cache_size_percent: f32) { let cache_size = (cache_size_percent * items as f32).round() as usize; let tinyufo = tinyufo::TinyUfo::new(cache_size, (cache_size as f32 * 1.0) as usize); @@ -92,6 +108,8 @@ lru dhat: At t-gmax: 9,408 bytes in 106 blocks moka dhat: At t-gmax: 354,232 bytes in 1,581 blocks +QuickCache +dhat: At t-gmax: 11,840 bytes in 8 blocks TinyUFO dhat: At t-gmax: 37,337 bytes in 351 blocks TinyUFO compat @@ -102,6 +120,8 @@ lru dhat: At t-gmax: 128,512 bytes in 1,004 blocks moka dhat: At t-gmax: 535,320 bytes in 7,278 blocks +QuickCache +dhat: At t-gmax: 93,000 bytes in 66 blocks TinyUFO dhat: At t-gmax: 236,053 bytes in 2,182 blocks TinyUFO Compact @@ -112,6 +132,8 @@ lru dhat: At t-gmax: 1,075,648 bytes in 10,004 blocks moka dhat: At t-gmax: 2,489,088 bytes in 62,374 blocks +QuickCache +dhat: At t-gmax: 863,752 bytes in 66 blocks TinyUFO dhat: At t-gmax: 2,290,635 bytes in 20,467 blocks TinyUFO @@ -133,6 +155,12 @@ fn main() { println!("\nmoka"); } + { + let _profiler = dhat::Profiler::new_heap(); + bench_quick_cache(1.05, items, 0.1); + println!("\nQuickCache"); + } + { let _profiler = dhat::Profiler::new_heap(); bench_tinyufo(1.05, items, 0.1); diff --git a/tinyufo/benches/bench_perf.rs b/tinyufo/benches/bench_perf.rs index bee8a11..9b74000 100644 --- a/tinyufo/benches/bench_perf.rs +++ b/tinyufo/benches/bench_perf.rs @@ -14,7 +14,7 @@ use rand::prelude::*; use std::num::NonZeroUsize; -use std::sync::Mutex; +use std::sync::{Barrier, Mutex}; use std::thread; use std::time::Instant; @@ -31,6 +31,7 @@ Below is from Linux + Ryzen 5 7600 CPU lru read total 150.423567ms, 30ns avg per operation, 33239472 ops per second moka read total 462.133322ms, 92ns avg per operation, 10819389 ops per second +quick_cache read total 125.618216ms, 25ns avg per operation, 39803144 ops per second tinyufo read total 199.007359ms, 39ns avg per operation, 25124698 ops per second tinyufo compact read total 331.145859ms, 66ns avg per operation, 15099087 ops per second @@ -42,6 +43,10 @@ moka read total 2.742258211s, 548ns avg per operation, 1823314 ops per second ... total 14072430 ops per second +quick_cache read total 1.186566627s, 237ns avg per operation, 4213838 ops per second +... +total 33694776 ops per second + tinyufo read total 208.346855ms, 41ns avg per operation, 23998444 ops per second ... total 148691408 ops per second @@ -58,6 +63,10 @@ moka mixed read/write 2.368500882s, 473ns avg per operation, 2111040 ops per sec ... total 16557962 ops per second +quick_cache mixed read/write 838.072588ms, 167ns avg per operation, 5966070 ops per second 315051 misses +... +total 47698472 ops per second + tinyufo mixed read/write 456.134531ms, 91ns avg per operation, 10961678 ops per second, 294977 misses ... total 80865792 ops per second @@ -68,9 +77,11 @@ total 62600844 ops per second */ fn main() { + println!("Note: these performance numbers vary a lot across different CPUs and OSes."); // we don't bench eviction here so make the caches large enough to hold all let lru = Mutex::new(lru::LruCache::::unbounded()); let moka = moka::sync::Cache::new(ITEMS as u64 + 10); + let quick_cache = quick_cache::sync::Cache::new(ITEMS + 10); let tinyufo = tinyufo::TinyUfo::new(ITEMS + 10, 10); let tinyufo_compact = tinyufo::TinyUfo::new_compact(ITEMS + 10, 10); @@ -78,6 +89,7 @@ fn main() { for i in 0..ITEMS { lru.lock().unwrap().put(i as u64, ()); moka.insert(i as u64, ()); + quick_cache.insert(i as u64, ()); tinyufo.put(i as u64, (), 1); tinyufo_compact.put(i as u64, (), 1); } @@ -108,6 +120,17 @@ fn main() { (ITERATIONS as f32 / elapsed.as_secs_f32()) as u32 ); + let before = Instant::now(); + for _ in 0..ITERATIONS { + quick_cache.get(&(zipf.sample(&mut rng) as u64)); + } + let elapsed = before.elapsed(); + println!( + "quick_cache read total {elapsed:?}, {:?} avg per operation, {} ops per second", + elapsed / ITERATIONS as u32, + (ITERATIONS as f32 / elapsed.as_secs_f32()) as u32 + ); + let before = Instant::now(); for _ in 0..ITERATIONS { tinyufo.get(&(zipf.sample(&mut rng) as u64)); @@ -131,13 +154,14 @@ fn main() { ); // concurrent - + let wg = Barrier::new(THREADS); let before = Instant::now(); thread::scope(|s| { for _ in 0..THREADS { s.spawn(|| { let mut rng = thread_rng(); let zipf = zipf::ZipfDistribution::new(ITEMS, 1.03).unwrap(); + wg.wait(); let before = Instant::now(); for _ in 0..ITERATIONS { lru.lock().unwrap().get(&(zipf.sample(&mut rng) as u64)); @@ -157,12 +181,14 @@ fn main() { (ITERATIONS as f32 * THREADS as f32 / elapsed.as_secs_f32()) as u32 ); + let wg = Barrier::new(THREADS); let before = Instant::now(); thread::scope(|s| { for _ in 0..THREADS { s.spawn(|| { let mut rng = thread_rng(); let zipf = zipf::ZipfDistribution::new(ITEMS, 1.03).unwrap(); + wg.wait(); let before = Instant::now(); for _ in 0..ITERATIONS { moka.get(&(zipf.sample(&mut rng) as u64)); @@ -182,12 +208,41 @@ fn main() { (ITERATIONS as f32 * THREADS as f32 / elapsed.as_secs_f32()) as u32 ); + let wg = Barrier::new(THREADS); let before = Instant::now(); thread::scope(|s| { for _ in 0..THREADS { s.spawn(|| { let mut rng = thread_rng(); let zipf = zipf::ZipfDistribution::new(ITEMS, 1.03).unwrap(); + wg.wait(); + let before = Instant::now(); + for _ in 0..ITERATIONS { + quick_cache.get(&(zipf.sample(&mut rng) as u64)); + } + let elapsed = before.elapsed(); + println!( + "quick_cache read total {elapsed:?}, {:?} avg per operation, {} ops per second", + elapsed / ITERATIONS as u32, + (ITERATIONS as f32 / elapsed.as_secs_f32()) as u32 + ); + }); + } + }); + let elapsed = before.elapsed(); + println!( + "total {} ops per second", + (ITERATIONS as f32 * THREADS as f32 / elapsed.as_secs_f32()) as u32 + ); + + let wg = Barrier::new(THREADS); + let before = Instant::now(); + thread::scope(|s| { + for _ in 0..THREADS { + s.spawn(|| { + let mut rng = thread_rng(); + let zipf = zipf::ZipfDistribution::new(ITEMS, 1.03).unwrap(); + wg.wait(); let before = Instant::now(); for _ in 0..ITERATIONS { tinyufo.get(&(zipf.sample(&mut rng) as u64)); @@ -207,12 +262,14 @@ fn main() { (ITERATIONS as f32 * THREADS as f32 / elapsed.as_secs_f32()) as u32 ); + let wg = Barrier::new(THREADS); let before = Instant::now(); thread::scope(|s| { for _ in 0..THREADS { s.spawn(|| { let mut rng = thread_rng(); let zipf = zipf::ZipfDistribution::new(ITEMS, 1.03).unwrap(); + wg.wait(); let before = Instant::now(); for _ in 0..ITERATIONS { tinyufo_compact.get(&(zipf.sample(&mut rng) as u64)); @@ -240,6 +297,7 @@ fn main() { let lru = Mutex::new(lru::LruCache::::new( NonZeroUsize::new(CACHE_SIZE).unwrap(), )); + let wg = Barrier::new(THREADS); let before = Instant::now(); thread::scope(|s| { for _ in 0..THREADS { @@ -247,6 +305,7 @@ fn main() { let mut miss_count = 0; let mut rng = thread_rng(); let zipf = zipf::ZipfDistribution::new(items, ZIPF_EXP).unwrap(); + wg.wait(); let before = Instant::now(); for _ in 0..ITERATIONS { let key = zipf.sample(&mut rng) as u64; @@ -272,7 +331,7 @@ fn main() { ); let moka = moka::sync::Cache::new(CACHE_SIZE as u64); - + let wg = Barrier::new(THREADS); let before = Instant::now(); thread::scope(|s| { for _ in 0..THREADS { @@ -280,6 +339,7 @@ fn main() { let mut miss_count = 0; let mut rng = thread_rng(); let zipf = zipf::ZipfDistribution::new(items, ZIPF_EXP).unwrap(); + wg.wait(); let before = Instant::now(); for _ in 0..ITERATIONS { let key = zipf.sample(&mut rng) as u64; @@ -303,7 +363,8 @@ fn main() { (ITERATIONS as f32 * THREADS as f32 / elapsed.as_secs_f32()) as u32 ); - let tinyufo = tinyufo::TinyUfo::new(CACHE_SIZE, CACHE_SIZE); + let quick_cache = quick_cache::sync::Cache::new(CACHE_SIZE); + let wg = Barrier::new(THREADS); let before = Instant::now(); thread::scope(|s| { for _ in 0..THREADS { @@ -311,6 +372,40 @@ fn main() { let mut miss_count = 0; let mut rng = thread_rng(); let zipf = zipf::ZipfDistribution::new(items, ZIPF_EXP).unwrap(); + wg.wait(); + let before = Instant::now(); + for _ in 0..ITERATIONS { + let key = zipf.sample(&mut rng) as u64; + if quick_cache.get(&key).is_none() { + quick_cache.insert(key, ()); + miss_count += 1; + } + } + let elapsed = before.elapsed(); + println!( + "quick_cache mixed read/write {elapsed:?}, {:?} avg per operation, {} ops per second {miss_count} misses", + elapsed / ITERATIONS as u32, + (ITERATIONS as f32 / elapsed.as_secs_f32()) as u32 + ); + }); + } + }); + let elapsed = before.elapsed(); + println!( + "total {} ops per second", + (ITERATIONS as f32 * THREADS as f32 / elapsed.as_secs_f32()) as u32 + ); + + let tinyufo = tinyufo::TinyUfo::new(CACHE_SIZE, CACHE_SIZE); + let wg = Barrier::new(THREADS); + let before = Instant::now(); + thread::scope(|s| { + for _ in 0..THREADS { + s.spawn(|| { + let mut miss_count = 0; + let mut rng = thread_rng(); + let zipf = zipf::ZipfDistribution::new(items, ZIPF_EXP).unwrap(); + wg.wait(); let before = Instant::now(); for _ in 0..ITERATIONS { let key = zipf.sample(&mut rng) as u64; @@ -336,6 +431,7 @@ fn main() { ); let tinyufo_compact = tinyufo::TinyUfo::new(CACHE_SIZE, CACHE_SIZE); + let wg = Barrier::new(THREADS); let before = Instant::now(); thread::scope(|s| { for _ in 0..THREADS { @@ -343,6 +439,7 @@ fn main() { let mut miss_count = 0; let mut rng = thread_rng(); let zipf = zipf::ZipfDistribution::new(items, ZIPF_EXP).unwrap(); + wg.wait(); let before = Instant::now(); for _ in 0..ITERATIONS { let key = zipf.sample(&mut rng) as u64;