From 738e0d75d4909500e3f1778b4a6f80b84f39f6ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Bylica?= <pawel@ethereum.org>
Date: Thu, 7 Nov 2024 22:21:43 +0100
Subject: [PATCH] LRU cache utility

Adds generic single-threaded implementation of
the least recently used (LRU) cache.

This is to be used to cache the code and code analysis.
---
 lib/evmone/CMakeLists.txt                    |   1 +
 lib/evmone/lru_cache.hpp                     | 157 +++++++++++++
 test/internal_benchmarks/CMakeLists.txt      |   4 +-
 test/internal_benchmarks/lru_cache_bench.cpp | 150 +++++++++++++
 test/unittests/CMakeLists.txt                |   1 +
 test/unittests/lru_cache_test.cpp            | 219 +++++++++++++++++++
 6 files changed, 531 insertions(+), 1 deletion(-)
 create mode 100644 lib/evmone/lru_cache.hpp
 create mode 100644 test/internal_benchmarks/lru_cache_bench.cpp
 create mode 100644 test/unittests/lru_cache_test.cpp
diff --git a/lib/evmone/CMakeLists.txt b/lib/evmone/CMakeLists.txt
index 777c0800f2..27cd2d49e0 100644
--- a/lib/evmone/CMakeLists.txt
+++ b/lib/evmone/CMakeLists.txt
@@ -25,6 +25,7 @@ add_library(evmone
     instructions_storage.cpp
     instructions_traits.hpp
     instructions_xmacro.hpp
+    lru_cache.hpp
     tracing.cpp
     tracing.hpp
     vm.cpp
diff --git a/lib/evmone/lru_cache.hpp b/lib/evmone/lru_cache.hpp
new file mode 100644
index 0000000000..d061eb5018
--- /dev/null
+++ b/lib/evmone/lru_cache.hpp
@@ -0,0 +1,157 @@
+// evmone: Fast Ethereum Virtual Machine implementation
+// Copyright 2024 The evmone Authors.
+// SPDX-License-Identifier: Apache-2.0
+#pragma once
+
+#include <list>
+#include <optional>
+#include <unordered_map>
+
+namespace evmone
+{
+/// Least Recently Used (LRU) cache.
+///
+/// A map of Key to Value with a fixed capacity. When the cache is full, a newly inserted entry
+/// replaces (evicts) the least recently used entry.
+template <typename Key, typename Value>
+class LRUCache
+{
+    struct LRUEntry
+    {
+        /// Reference to the existing key in the map.
+        ///
+        /// This is needed to get the LRU element in the map when eviction is needed.
+        ///  Pointers to node-based map entries are always valid.
+        /// TODO: Optimal solution would be to use the map iterator. They are also always valid
+        ///   because the map capacity is reserved up front and rehashing never happens.
+        ///   However, the type definition would be recursive: Map(List(Map::iterator)), so we need
+        ///   to use some kind of type erasure. We prototyped such implementation, but decided not
+        ///   to include it in the first version. Similar solution is also described in
+        ///   https://stackoverflow.com/a/54808013/725174.
+        const Key& key;
+
+        /// The cached value.
+        Value value;
+    };
+
+    using LRUList = std::list<LRUEntry>;
+    using LRUIterator = typename LRUList::iterator;
+    using Map = std::unordered_map<Key, LRUIterator>;
+
+    /// The fixed capacity of the cache.
+    const size_t capacity_;
+
+    /// The list to order the cache entries by the usage. The front element is the least recently
+    /// used entry.
+    ///
+    /// In classic implementations the order in the list is reversed (the front element is the most
+    /// recently used entry). We decided to keep the order as is because
+    /// it simplifies the implementation and better fits the underlying list structure.
+    ///
+    /// TODO: The intrusive list works better here but such implementation variant has been omitted
+    ///   from the initial version.
+    LRUList lru_list_;
+
+    /// The map of Keys to Values via the LRU list indirection.
+    ///
+    /// The Value don't have to be in the LRU list but instead can be placed in the map directly
+    /// next to the LRU iterator. We decided to keep this classic layout because we didn't notice
+    /// any performance difference.
+    Map map_;
+
+    /// Marks an element as the most recently used by moving it to the back of the LRU list.
+    void move_to_back(LRUIterator it) noexcept { lru_list_.splice(lru_list_.end(), lru_list_, it); }
+
+public:
+    /// Constructs the LRU cache with the given capacity.
+    ///
+    /// @param capacity  The fixed capacity of the cache.
+    explicit LRUCache(size_t capacity) : capacity_(capacity)
+    {
+        // Reserve map to the full capacity to prevent any rehashing.
+        map_.reserve(capacity);
+    }
+
+    /// Clears the cache by deleting all the entries.
+    void clear() noexcept
+    {
+        map_.clear();
+        lru_list_.clear();
+    }
+
+
+    /// Retrieves the copy of the value associated with the specified key.
+    ///
+    /// @param key  The key of the entry to retrieve.
+    /// @return     An optional containing the copy of the value if the key is found,
+    ///             or an empty optional if not.
+    std::optional<Value> get(const Key& key) noexcept
+    {
+        if (const auto it = map_.find(key); it != map_.end())
+        {
+            move_to_back(it->second);
+            return it->second->value;
+        }
+        return {};
+    }
+
+    /// Inserts or updates the value associated with the specified key.
+    ///
+    /// @param key    The key of the entry to insert or update.
+    /// @param value  The value to associate with the key.
+    void put(Key key, Value value)
+    {
+        // Implementation is split into two variants: cache full or not.
+        // Once the cache is full, its size never shrinks therefore from now on this variant is
+        // always executed.
+
+        if (map_.size() == capacity_)
+        {
+            // When the cache is full we avoid erase-emplace pattern by using the map's node API.
+
+            using namespace std;  // for swap usage with ADL
+
+            // Get the least recently used element.
+            auto lru_it = lru_list_.begin();
+
+            // Extract the map node with the to-be-evicted element and reuse it for the new
+            // key-value pair. This makes the operation allocation-free.
+            auto node = map_.extract(lru_it->key);
+            swap(node.key(), key);
+            if (auto [it, inserted, node2] = map_.insert(std::move(node)); !inserted)
+            {
+                // Failed re-insertion means the element with the new key is already in the cache.
+                // Rollback the eviction by re-inserting the node with original key back.
+                swap(key, node2.key());
+                map_.insert(std::move(node2));
+
+                // Returned iterator points to the element matching the key
+                // which value must be updated.
+                lru_it = it->second;
+            }
+            lru_it->value = std::move(value);  // Replace/update the value.
+            move_to_back(lru_it);
+        }
+        else
+        {
+            // The cache is not full. Insert the new element into the cache.
+            if (const auto [it, inserted] = map_.try_emplace(std::move(key)); !inserted)
+            {
+                // If insertion failed, the key is already in the cache so just update the value.
+                it->second->value = std::move(value);
+                move_to_back(it->second);
+            }
+            else
+            {
+                // After successful insertion also create the LRU list entry and connect it with
+                // the map entry. This reference is valid and unchanged through
+                // the whole cache lifetime.
+                // TODO(clang): no matching constructor for initialization of 'LRUEntry'
+                it->second =
+                    lru_list_.emplace(lru_list_.end(), LRUEntry{it->first, std::move(value)});
+            }
+        }
+    }
+};
+
+}  // namespace evmone
diff --git a/test/internal_benchmarks/CMakeLists.txt b/test/internal_benchmarks/CMakeLists.txt
index 4b5228d687..654dbf911b 100644
--- a/test/internal_benchmarks/CMakeLists.txt
+++ b/test/internal_benchmarks/CMakeLists.txt
@@ -7,6 +7,8 @@ add_executable(
     evmmax_bench.cpp
     find_jumpdest_bench.cpp
     memory_allocation.cpp
+    lru_cache_bench.cpp
 )
 
-target_link_libraries(evmone-bench-internal PRIVATE evmone::evmmax benchmark::benchmark)
+target_link_libraries(evmone-bench-internal PRIVATE evmone evmone::evmmax benchmark::benchmark)
+target_include_directories(evmone-bench-internal PRIVATE ${evmone_private_include_dir})
diff --git a/test/internal_benchmarks/lru_cache_bench.cpp b/test/internal_benchmarks/lru_cache_bench.cpp
new file mode 100644
index 0000000000..1e332e8bdc
--- /dev/null
+++ b/test/internal_benchmarks/lru_cache_bench.cpp
@@ -0,0 +1,150 @@
+// evmone: Fast Ethereum Virtual Machine implementation
+// Copyright 2024 The evmone Authors.
+// SPDX-License-Identifier: Apache-2.0
+
+#include "../state/hash_utils.hpp"
+#include <benchmark/benchmark.h>
+#include <evmone/lru_cache.hpp>
+#include <memory>
+
+using evmone::hash256;
+
+namespace
+{
+template <typename Key, typename Value>
+void lru_cache_not_found(benchmark::State& state)
+{
+    const auto capacity = static_cast<size_t>(state.range(0));
+    evmone::LRUCache<Key, Value> cache(capacity);
+
+    std::vector<Key> keys(capacity + 1, Key{});
+    for (size_t i = 0; i < keys.size(); ++i)
+        keys[i] = static_cast<Key>(i);
+    benchmark::ClobberMemory();
+
+    for (size_t i = 0; i < capacity; ++i)
+        cache.put(keys[i], {});
+
+    const volatile auto key = &keys[capacity];
+
+    for ([[maybe_unused]] auto _ : state)
+    {
+        auto v = cache.get(*key);
+        benchmark::DoNotOptimize(v);
+        if (v.has_value()) [[unlikely]]
+            state.SkipWithError("found");
+    }
+}
+BENCHMARK(lru_cache_not_found<int, int>)->Arg(5000);
+BENCHMARK(lru_cache_not_found<hash256, std::shared_ptr<char>>)->Arg(5000);
+
+
+template <typename Key, typename Value>
+void lru_cache_get_same(benchmark::State& state)
+{
+    const auto capacity = static_cast<size_t>(state.range(0));
+    evmone::LRUCache<Key, Value> cache(capacity);
+
+    std::vector<Key> keys(capacity, Key{});
+    for (size_t i = 0; i < keys.size(); ++i)
+        keys[i] = static_cast<Key>(i);
+    benchmark::ClobberMemory();
+
+    for (const auto key : keys)
+        cache.put(key, {});
+
+    const volatile auto key = &keys[capacity / 2];
+
+    for ([[maybe_unused]] auto _ : state)
+    {
+        auto v = cache.get(*key);
+        benchmark::DoNotOptimize(v);
+        if (!v.has_value()) [[unlikely]]
+            state.SkipWithError("not found");
+    }
+}
+BENCHMARK(lru_cache_get_same<int, int>)->Arg(5000);
+BENCHMARK(lru_cache_get_same<hash256, std::shared_ptr<char>>)->Arg(5000);
+
+
+template <typename Key, typename Value>
+void lru_cache_get(benchmark::State& state)
+{
+    const auto capacity = static_cast<size_t>(state.range(0));
+    evmone::LRUCache<Key, Value> cache(capacity);
+
+    std::vector<Key> data(capacity, Key{});
+    for (size_t i = 0; i < data.size(); ++i)
+        data[i] = static_cast<Key>(i);
+    benchmark::ClobberMemory();
+
+    for (const auto& key : data)
+        cache.put(key, {});
+
+    auto key_it = data.begin();
+    for ([[maybe_unused]] auto _ : state)
+    {
+        auto v = cache.get(*key_it++);
+        benchmark::DoNotOptimize(v);
+        if (!v.has_value()) [[unlikely]]
+            state.SkipWithError("not found");
+
+        if (key_it == data.end())
+            key_it = data.begin();
+    }
+}
+BENCHMARK(lru_cache_get<int, int>)->Arg(5000);
+BENCHMARK(lru_cache_get<hash256, std::shared_ptr<char>>)->Arg(5000);
+
+
+template <typename Key, typename Value>
+void lru_cache_put_empty(benchmark::State& state)
+{
+    const auto capacity = static_cast<size_t>(state.range(0));
+    evmone::LRUCache<Key, Value> cache(capacity);
+
+    std::vector<Key> data(capacity, Key{});
+    for (size_t i = 0; i < data.size(); ++i)
+        data[i] = static_cast<Key>(i);
+    benchmark::ClobberMemory();
+
+    while (state.KeepRunningBatch(static_cast<benchmark::IterationCount>(capacity)))
+    {
+        for (const auto& key : data)
+        {
+            cache.put(key, {});
+        }
+        state.PauseTiming();
+        cache.clear();
+        state.ResumeTiming();
+    }
+}
+BENCHMARK(lru_cache_put_empty<int, int>)->Arg(5000);
+BENCHMARK(lru_cache_put_empty<hash256, std::shared_ptr<char>>)->Arg(5000);
+
+
+template <typename Key, typename Value>
+void lru_cache_put_full(benchmark::State& state)
+{
+    const auto capacity = static_cast<size_t>(state.range(0));
+    evmone::LRUCache<Key, Value> cache(capacity);
+
+    std::vector<Key> keys(capacity, Key{});
+    for (size_t i = 0; i < keys.size(); ++i)
+        keys[i] = static_cast<Key>(i);
+    benchmark::ClobberMemory();
+
+    for (const auto& key : keys)
+        cache.put(key, {});
+
+    auto key_index = keys.size();
+    for ([[maybe_unused]] auto _ : state)
+    {
+        cache.put(static_cast<Key>(key_index), {});
+        ++key_index;
+    }
+}
+BENCHMARK(lru_cache_put_full<int, int>)->Arg(5000);
+BENCHMARK(lru_cache_put_full<hash256, std::shared_ptr<char>>)->Arg(5000);
+
+}  // namespace
diff --git a/test/unittests/CMakeLists.txt b/test/unittests/CMakeLists.txt
index 03014aaa43..ad98a9b0ee 100644
--- a/test/unittests/CMakeLists.txt
+++ b/test/unittests/CMakeLists.txt
@@ -48,6 +48,7 @@ target_sources(
     exportable_fixture.cpp
     instructions_test.cpp
     jumpdest_analysis_test.cpp
+    lru_cache_test.cpp
     precompiles_blake2b_test.cpp
     precompiles_bls_test.cpp
     precompiles_kzg_test.cpp
diff --git a/test/unittests/lru_cache_test.cpp b/test/unittests/lru_cache_test.cpp
new file mode 100644
index 0000000000..b2ebd2f832
--- /dev/null
+++ b/test/unittests/lru_cache_test.cpp
@@ -0,0 +1,219 @@
+// evmone: Fast Ethereum Virtual Machine implementation
+// Copyright 2024 The evmone Authors.
+// SPDX-License-Identifier: Apache-2.0
+
+#include <evmone/lru_cache.hpp>
+#include <gtest/gtest.h>
+#include <algorithm>
+#include <numeric>
+#include <random>
+
+using evmone::LRUCache;
+
+TEST(lru_cache, capacity1)
+{
+    LRUCache<char, int> c(1);
+    c.put('a', 2);
+    EXPECT_EQ(c.get('a'), 2);
+}
+
+TEST(lru_cache, not_found)
+{
+    LRUCache<char, int> c(0);
+    EXPECT_EQ(c.get('a'), std::nullopt);
+}
+
+TEST(lru_cache, evict_capacity1)
+{
+    LRUCache<char, int> c(1);
+    c.put('a', 2);
+    c.put('b', 3);
+    EXPECT_EQ(c.get('a'), std::nullopt);
+    EXPECT_EQ(c.get('b'), 3);
+}
+
+TEST(lru_cache, double_evict_capacity1)
+{
+    LRUCache<char, int> c(1);
+    c.put('a', 2);
+    c.put('b', 3);
+    c.put('c', 4);  // second eviction works properly provided the list iterator has valid key
+    EXPECT_EQ(c.get('c'), 4);
+}
+
+TEST(lru_cache, evict_capacity3)
+{
+    LRUCache<char, int> c(3);
+    c.put('a', 1);
+    c.put('b', 2);
+    c.put('c', 3);
+    EXPECT_EQ(c.get('a'), 1);
+    EXPECT_EQ(c.get('b'), 2);
+    EXPECT_EQ(c.get('c'), 3);
+
+    c.put('d', 4);
+    EXPECT_EQ(c.get('a'), std::nullopt);
+    EXPECT_EQ(c.get('b'), 2);
+    EXPECT_EQ(c.get('c'), 3);
+    EXPECT_EQ(c.get('d'), 4);
+}
+
+TEST(lru_cache, evict_get3)
+{
+    LRUCache<char, std::optional<int>> c(3);
+    c.put('a', 1);
+    c.put('b', 2);
+    c.put('c', 3);
+    EXPECT_EQ(*c.get('c'), 3);
+    EXPECT_EQ(*c.get('b'), 2);
+    EXPECT_EQ(*c.get('a'), 1);
+
+    c.put('d', 4);
+    EXPECT_EQ(c.get('c'), std::nullopt);
+    EXPECT_EQ(*c.get('b'), 2);
+    EXPECT_EQ(*c.get('a'), 1);
+    EXPECT_EQ(*c.get('d'), 4);
+
+    c.put('e', 5);
+    EXPECT_EQ(c.get('c'), std::nullopt);
+    EXPECT_EQ(c.get('b'), std::nullopt);
+    EXPECT_EQ(*c.get('a'), 1);
+    EXPECT_EQ(*c.get('d'), 4);
+    EXPECT_EQ(*c.get('e'), 5);
+}
+
+TEST(lru_cache, update_capacity1)
+{
+    LRUCache<char, int> c(1);
+    c.put('a', 1);
+    c.put('a', 2);
+    EXPECT_EQ(c.get('a'), 2);
+}
+
+TEST(lru_cache, update_first_capacity2)
+{
+    LRUCache<char, int> c(2);
+    c.put('a', 1);
+    c.put('a', 2);
+    EXPECT_EQ(c.get('a'), 2);
+
+    c.put('b', 2);
+    EXPECT_EQ(c.get('a'), 2);
+    EXPECT_EQ(c.get('b'), 2);
+}
+
+TEST(lru_cache, update_second_capacity2)
+{
+    LRUCache<char, int> c(2);
+    c.put('a', 1);
+    c.put('b', 2);
+    c.put('b', 3);
+    EXPECT_EQ(c.get('b'), 3);
+    EXPECT_EQ(c.get('a'), 1);
+}
+
+TEST(lru_cache, update_evict_capacity2)
+{
+    LRUCache<char, int> c(2);
+    c.put('a', 1);
+    c.put('b', 2);
+    EXPECT_EQ(c.get('a'), 1);
+    EXPECT_EQ(c.get('b'), 2);
+
+    c.put('a', 3);  // updates access for 'a'.
+    c.put('c', 4);  // evicts 'b'.
+    EXPECT_EQ(c.get('a'), 3);
+    EXPECT_EQ(c.get('c'), 4);
+}
+
+TEST(lru_cache, update_evict_capacity3)
+{
+    LRUCache<char, int> c(3);
+    c.put('a', 1);
+    c.put('b', 2);
+    c.put('a', 3);  // updates 'a' and its access.
+    c.put('c', 4);
+    c.put('e', 5);  // evicts 'b'.
+    EXPECT_EQ(c.get('a'), 3);
+    EXPECT_EQ(c.get('c'), 4);
+    EXPECT_EQ(c.get('e'), 5);
+}
+
+TEST(lru_cache, update_full_evict_capacity3)
+{
+    LRUCache<char, int> c(3);
+    c.put('a', 1);
+    c.put('b', 2);
+    c.put('c', 3);  // full
+    c.put('b', 4);  // update 'b' and its access.
+    c.put('e', 5);  // evicts 'a' → 'e'.
+    c.put('f', 6);  // evicts 'c' → 'f'.
+    EXPECT_EQ(c.get('f'), 6);
+    EXPECT_EQ(c.get('e'), 5);
+    EXPECT_EQ(c.get('b'), 4);
+}
+
+static auto get_rng()
+{
+    const auto seed = testing::UnitTest::GetInstance()->random_seed();
+    return std::mt19937_64(static_cast<uint64_t>(seed));
+}
+
+template <typename T>
+static std::vector<T> shuffled_values(size_t n, auto& rng)
+{
+    std::vector<T> values(n);
+    std::iota(values.begin(), values.end(), 0);
+    std::ranges::shuffle(values, rng);
+    return values;
+}
+
+TEST(lru_cache, mass_put)
+{
+    static constexpr auto N = 100'000;
+    auto rng = get_rng();
+    const auto values = shuffled_values<int>(N, rng);
+
+    LRUCache<int, int> c(N);
+    for (const auto v : values)
+        c.put(v, v);
+
+    for (const auto v : values)
+        EXPECT_EQ(*c.get(v), v);
+}
+
+TEST(lru_cache, mass_update)
+{
+    static constexpr auto N = 100'000;
+    auto rng = get_rng();
+    const auto values = shuffled_values<size_t>(N, rng);
+
+    LRUCache<size_t, int> c(N);
+    for (const auto v : values)
+        c.put(v, 0);
+
+    std::vector counts(N, 0);
+    for (int i = 0; i < N; ++i)
+    {
+        const auto v = static_cast<size_t>(rng() % N);
+        c.put(v, ++counts[v]);
+    }
+
+    for (const auto v : values)
+        EXPECT_EQ(*c.get(v), counts[v]);
+}
+
+TEST(lru_cache, mass_put_over_capacity)
+{
+    static constexpr auto N = 100'000;
+    auto rng = get_rng();
+    const auto values = shuffled_values<int>(N, rng);
+
+    LRUCache<int, int> c(N / 2);
+    for (const auto v : values)
+        c.put(v, v);
+
+    // Expect the second half of the values to be in the cache.
+    for (size_t i = N / 2; i < N; ++i)
+        EXPECT_EQ(*c.get(values[i]), values[i]);
+}