diff --git a/sycl/doc/EnvironmentVariables.md b/sycl/doc/EnvironmentVariables.md index 3172bc2446aee..5ee2c40542ced 100644 --- a/sycl/doc/EnvironmentVariables.md +++ b/sycl/doc/EnvironmentVariables.md @@ -14,6 +14,7 @@ compiler and runtime. | `SYCL_CACHE_DISABLE_PERSISTENT (deprecated)` | Any(\*) | Has no effect. | | `SYCL_CACHE_PERSISTENT` | Integer | Controls persistent device compiled code cache. Turns it on if set to '1' and turns it off if set to '0'. When cache is enabled SYCL runtime will try to cache and reuse JIT-compiled binaries. Default is off. | | `SYCL_CACHE_IN_MEM` | '1' or '0' | Enable ('1') or disable ('0') in-memory caching of device compiled code. When cache is enabled SYCL runtime will try to cache and reuse JIT-compiled binaries. Default is '1'. | +| `SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD` | Positive integer | `SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD` accepts an integer that specifies the maximum size of the in-memory program cache in bytes. Eviction is performed when the cache size exceeds the threshold. The default value is 0 which means that eviction is disabled. | | `SYCL_CACHE_EVICTION_DISABLE` | Any(\*) | Switches persistent cache eviction off when the variable is set. | | `SYCL_CACHE_MAX_SIZE` | Positive integer | Persistent cache eviction is triggered once total size of cached images exceeds the value in megabytes (default - 8 192 for 8 GB). Set to 0 to disable size-based cache eviction. | | `SYCL_CACHE_THRESHOLD` | Positive integer | Persistent cache eviction threshold in days (default value is 7 for 1 week). Set to 0 for disabling time-based cache eviction. | diff --git a/sycl/source/detail/config.def b/sycl/source/detail/config.def index 9172df2a1497b..f459a2dffa50d 100644 --- a/sycl/source/detail/config.def +++ b/sycl/source/detail/config.def @@ -27,6 +27,8 @@ CONFIG(SYCL_HOST_UNIFIED_MEMORY, 1, __SYCL_HOST_UNIFIED_MEMORY) // 260 (Windows limit) - 12 (filename) - 84 (cache directory structure) CONFIG(SYCL_CACHE_DIR, 164, __SYCL_CACHE_DIR) CONFIG(SYCL_CACHE_TRACE, 4, __SYCL_CACHE_TRACE) +CONFIG(SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD, 16, + __SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD) CONFIG(SYCL_CACHE_DISABLE_PERSISTENT, 1, __SYCL_CACHE_DISABLE_PERSISTENT) CONFIG(SYCL_CACHE_PERSISTENT, 1, __SYCL_CACHE_PERSISTENT) CONFIG(SYCL_CACHE_EVICTION_DISABLE, 1, __SYCL_CACHE_EVICTION_DISABLE) diff --git a/sycl/source/detail/config.hpp b/sycl/source/detail/config.hpp index 3c1f2f6822807..ace69d0a9420e 100644 --- a/sycl/source/detail/config.hpp +++ b/sycl/source/detail/config.hpp @@ -756,6 +756,56 @@ template <> class SYCLConfig { } }; +// SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD accepts an integer that specifies +// the maximum size of the in-memory Program cache. +// Cache eviction is performed when the cache size exceeds the threshold. +// The thresholds are specified in bytes. +// The default value is "0" which means that eviction is disabled. +template <> class SYCLConfig { + using BaseT = SYCLConfigBase; + +public: + static int get() { return getCachedValue(); } + static void reset() { (void)getCachedValue(true); } + + static int getProgramCacheSize() { return getCachedValue(); } + + static bool isProgramCacheEvictionEnabled() { + return getProgramCacheSize() > 0; + } + +private: + static int getCachedValue(bool ResetCache = false) { + const auto Parser = []() { + const char *ValStr = BaseT::getRawValue(); + + // Disable eviction by default. + if (!ValStr) + return 0; + + int CacheSize = 0; + try { + CacheSize = std::stoi(ValStr); + if (CacheSize < 0) + throw INVALID_CONFIG_EXCEPTION(BaseT, "Value must be non-negative"); + } catch (...) { + std::string Msg = std::string{ + "Invalid input to SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD. Please try " + "a positive integer."}; + throw exception(make_error_code(errc::runtime), Msg); + } + + return CacheSize; + }; + + static auto EvictionThresholds = Parser(); + if (ResetCache) + EvictionThresholds = Parser(); + + return EvictionThresholds; + } +}; + #undef INVALID_CONFIG_EXCEPTION } // namespace detail diff --git a/sycl/source/detail/kernel_program_cache.hpp b/sycl/source/detail/kernel_program_cache.hpp index f58cda059bcce..9f06d0ebcde8d 100644 --- a/sycl/source/detail/kernel_program_cache.hpp +++ b/sycl/source/detail/kernel_program_cache.hpp @@ -21,7 +21,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -36,6 +38,20 @@ namespace sycl { inline namespace _V1 { namespace detail { class context_impl; + +// During SYCL program execution SYCL runtime will create internal objects +// representing kernels and programs, it may also invoke JIT compiler to bring +// kernels in a program to executable state. Those runtime operations are quite +// expensive. To avoid redundant operations and to speed up the execution, SYCL +// runtime employs in-memory cache for kernels and programs. When a kernel is +// invoked multiple times, the runtime will fetch the kernel from the cache +// instead of creating it from scratch. +// By default, there is no upper bound on the cache size. +// When the system runs out of memory, the cache will be cleared. Alternatively, +// the cache size can be limited by setting SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD +// to a positive value. When the cache size exceeds the threshold, the least +// recently used programs, and associated kernels, will be evicted from the +// cache. class KernelProgramCache { public: /// Denotes build error data. The data is filled in from sycl::exception @@ -127,10 +143,51 @@ class KernelProgramCache { using CommonProgramKeyT = std::pair>; + // A custom hashing and equality function for ProgramCacheKeyT. + // These are used to compare and hash the keys in the cache. + struct ProgramCacheKeyHash { + std::size_t operator()(const ProgramCacheKeyT &Key) const { + std::size_t Hash = 0; + // Hash the serialized object, representing spec consts. + for (const auto &Elem : Key.first.first) + Hash ^= std::hash{}(Elem); + + // Hash the imageId. + Hash ^= std::hash{}(Key.first.second); + + // Hash the devices. + for (const auto &Elem : Key.second) + Hash ^= std::hash{}(static_cast(Elem)); + return Hash; + } + }; + + struct ProgramCacheKeyEqual { + bool operator()(const ProgramCacheKeyT &LHS, + const ProgramCacheKeyT &RHS) const { + // Check equality of SerializedObj (Spec const) + return std::equal(LHS.first.first.begin(), LHS.first.first.end(), + RHS.first.first.begin()) && + // Check equality of imageId + LHS.first.second == RHS.first.second && + // Check equality of devices + std::equal(LHS.second.begin(), LHS.second.end(), + RHS.second.begin(), RHS.second.end()); + } + }; + struct ProgramCache { ::boost::unordered_map Cache; ::boost::unordered_multimap KeyMap; + // Mapping between a UR program and its size. + std::unordered_map ProgramSizeMap; + + size_t ProgramCacheSizeInBytes = 0; + inline size_t GetProgramCacheSizeInBytes() const noexcept { + return ProgramCacheSizeInBytes; + } + // Returns number of entries in the cache. size_t size() const noexcept { return Cache.size(); } }; @@ -184,6 +241,62 @@ class KernelProgramCache { using KernelFastCacheT = ::boost::unordered_flat_map; + // DS to hold data and functions related to Program cache eviction. + struct EvictionList { + private: + // Linked list of cache entries to be evicted in case of cache overflow. + std::list MProgramEvictionList; + + // Mapping between program handle and the iterator to the eviction list. + std::unordered_map::iterator, + ProgramCacheKeyHash, ProgramCacheKeyEqual> + MProgramToEvictionListMap; + + public: + std::list &getProgramEvictionList() { + return MProgramEvictionList; + } + + void clear() { + MProgramEvictionList.clear(); + MProgramToEvictionListMap.clear(); + } + + void emplaceBack(const ProgramCacheKeyT &CacheKey) { + MProgramEvictionList.emplace_back(CacheKey); + + // In std::list, the iterators are not invalidated when elements are + // added/removed/moved to the list. So, we can safely store the iterators. + MProgramToEvictionListMap[CacheKey] = + std::prev(MProgramEvictionList.end()); + traceProgram("Program added to the end of eviction list.", CacheKey); + } + + // This function is called on the hot path, whenever a kernel/program + // is accessed. So, it should be very fast. + void moveToEnd(const ProgramCacheKeyT &CacheKey) { + auto It = MProgramToEvictionListMap.find(CacheKey); + if (It != MProgramToEvictionListMap.end()) { + MProgramEvictionList.splice(MProgramEvictionList.end(), + MProgramEvictionList, It->second); + traceProgram("Program moved to the end of eviction list.", CacheKey); + } + // else: This can happen if concurrently the program is removed from + // eviction list by another thread. + } + + bool empty() { return MProgramEvictionList.empty(); } + + size_t size() { return MProgramEvictionList.size(); } + + void popFront() { + if (!MProgramEvictionList.empty()) { + MProgramToEvictionListMap.erase(MProgramEvictionList.front()); + MProgramEvictionList.pop_front(); + } + } + }; + ~KernelProgramCache() = default; void setContextPtr(const ContextPtr &AContext) { MParentContext = AContext; } @@ -197,12 +310,24 @@ class KernelProgramCache { int ImageId = CacheKey.first.second; std::stringstream DeviceList; + std::vector SerializedObjVec = CacheKey.first.first; + + // Convert spec constants to string. Spec constants are stored as + // ASCII values, so we need need to convert them to int and then to + // string. + std::string SerializedObjString; + SerializedObjString.reserve(SerializedObjVec.size() * sizeof(size_t)); + for (unsigned char c : SerializedObjVec) + SerializedObjString += std::to_string((int)c) + ","; + for (const auto &Device : CacheKey.second) DeviceList << "0x" << std::setbase(16) << reinterpret_cast(Device) << ","; std::string Identifier = "[Key:{imageId = " + std::to_string(ImageId) + - ",urDevice = " + DeviceList.str() + "}]: "; + ",urDevice = " + DeviceList.str() + + ", serializedObj = " + SerializedObjString + + "}]: "; std::cerr << "[In-Memory Cache][Thread Id:" << std::this_thread::get_id() << "][Program Cache]" << Identifier << Msg << std::endl; @@ -232,6 +357,10 @@ class KernelProgramCache { return {MKernelsPerProgramCache, MKernelsPerProgramCacheMutex}; } + Locked acquireEvictionList() { + return {MEvictionList, MProgramEvictionListMutex}; + } + std::pair getOrInsertProgram(const ProgramCacheKeyT &CacheKey) { auto LockedCache = acquireCachedPrograms(); @@ -268,8 +397,7 @@ class KernelProgramCache { std::make_pair(CacheKey.first.second, CacheKey.second); ProgCache.KeyMap.emplace(CommonKey, CacheKey); traceProgram("Program inserted.", CacheKey); - } else - traceProgram("Program fetched.", CacheKey); + } return DidInsert; } @@ -300,6 +428,23 @@ class KernelProgramCache { template void saveKernel(KeyT &&CacheKey, ValT &&CacheVal) { + + if (SYCLConfig:: + isProgramCacheEvictionEnabled()) { + + ur_program_handle_t Program = std::get<3>(CacheVal); + // Save kernel in fast cache only if the corresponding program is also + // in the cache. + auto LockedCache = acquireCachedPrograms(); + auto &ProgCache = LockedCache.get(); + if (ProgCache.ProgramSizeMap.find(Program) == + ProgCache.ProgramSizeMap.end()) + return; + + // Save reference between the program and the fast cache key. + MProgramToKernelFastCacheKeyMap[Program].emplace_back(CacheKey); + } + std::unique_lock Lock(MKernelFastCacheMutex); // if no insertion took place, thus some other thread has already inserted // smth in the cache @@ -307,6 +452,167 @@ class KernelProgramCache { MKernelFastCache.emplace(CacheKey, CacheVal); } + // Evict programs from cache to free up space. + void evictPrograms(size_t DesiredCacheSize, size_t CurrentCacheSize) { + + // Figure out how many programs from the beginning we need to evict. + if (CurrentCacheSize < DesiredCacheSize || MCachedPrograms.Cache.empty()) + return; + + // Evict programs from the beginning of the cache. + { + std::lock_guard Lock(MProgramEvictionListMutex); + auto &ProgramEvictionList = MEvictionList.getProgramEvictionList(); + size_t CurrCacheSize = MCachedPrograms.ProgramCacheSizeInBytes; + + // Traverse the eviction list and remove the LRU programs. + // The LRU programs will be at the front of the list. + while (CurrCacheSize > DesiredCacheSize && !MEvictionList.empty()) { + ProgramCacheKeyT CacheKey = ProgramEvictionList.front(); + auto LockedCache = acquireCachedPrograms(); + auto &ProgCache = LockedCache.get(); + auto It = ProgCache.Cache.find(CacheKey); + + if (It != ProgCache.Cache.end()) { + // We are about to remove this program now. + // (1) Remove it from KernelPerProgram cache. + // (2) Remove corresponding entries from KernelFastCache. + // (3) Remove it from ProgramCache KeyMap. + // (4) Remove it from the ProgramCache. + // (5) Remove it from ProgramSizeMap. + // (6) Update the cache size. + + // Remove entry from the KernelsPerProgram cache. + ur_program_handle_t NativePrg = It->second->Val; + { + auto LockedCacheKP = acquireKernelsPerProgramCache(); + // List kernels that are to be removed from the cache, if tracing is + // enabled. + if (SYCLConfig::isTraceInMemCache()) { + for (const auto &Kernel : LockedCacheKP.get()[NativePrg]) + traceKernel("Kernel evicted.", Kernel.first); + } + LockedCacheKP.get().erase(NativePrg); + } + + // Remove corresponding entries from KernelFastCache. + auto FastCacheKeyItr = + MProgramToKernelFastCacheKeyMap.find(NativePrg); + if (FastCacheKeyItr != MProgramToKernelFastCacheKeyMap.end()) { + for (const auto &FastCacheKey : FastCacheKeyItr->second) { + std::unique_lock Lock(MKernelFastCacheMutex); + MKernelFastCache.erase(FastCacheKey); + traceKernel("Kernel evicted.", std::get<2>(FastCacheKey), true); + } + MProgramToKernelFastCacheKeyMap.erase(FastCacheKeyItr); + } + + // Remove entry from ProgramCache KeyMap. + CommonProgramKeyT CommonKey = + std::make_pair(CacheKey.first.second, CacheKey.second); + // Since KeyMap is a multi-map, we need to iterate over all entries + // with this CommonKey and remove those that match the CacheKey. + auto KeyMapItrRange = LockedCache.get().KeyMap.equal_range(CommonKey); + for (auto KeyMapItr = KeyMapItrRange.first; + KeyMapItr != KeyMapItrRange.second; ++KeyMapItr) { + if (KeyMapItr->second == CacheKey) { + LockedCache.get().KeyMap.erase(KeyMapItr); + break; + } + } + + // Get size of the program. + size_t ProgramSize = MCachedPrograms.ProgramSizeMap[It->second->Val]; + // Evict program from the cache. + ProgCache.Cache.erase(It); + // Remove program size from the cache size. + MCachedPrograms.ProgramCacheSizeInBytes -= ProgramSize; + MCachedPrograms.ProgramSizeMap.erase(NativePrg); + + traceProgram("Program evicted.", CacheKey); + } else + // This should never happen. + throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), + "Program not found in the cache."); + + CurrCacheSize = MCachedPrograms.ProgramCacheSizeInBytes; + // Remove the program from the eviction list. + MEvictionList.popFront(); + } + } + } + + // Register that a program has been fetched from the cache. + // If it is the first time the program is fetched, add it to the eviction + // list. + void registerProgramFetch(const ProgramCacheKeyT &CacheKey, + const ur_program_handle_t &Program, + const bool IsBuilt) { + + size_t ProgramCacheEvictionThreshold = + SYCLConfig::getProgramCacheSize(); + + // No need to populate the eviction list if eviction is disabled. + if (ProgramCacheEvictionThreshold == 0) + return; + + // If the program is not in the cache, add it to the cache. + if (IsBuilt) { + // This is the first time we are adding this entry. Add it to the end of + // eviction list. + { + std::lock_guard Lock(MProgramEvictionListMutex); + MEvictionList.emplaceBack(CacheKey); + } + + // Store size of the program and check if we need to evict some entries. + // Get Size of the program. + size_t ProgramSize = 0; + auto Adapter = getAdapter(); + + try { + // Get number of devices this program was built for. + unsigned int DeviceNum = 0; + Adapter->call( + Program, UR_PROGRAM_INFO_NUM_DEVICES, sizeof(DeviceNum), &DeviceNum, + nullptr); + + // Get binary sizes for each device. + std::vector BinarySizes(DeviceNum); + Adapter->call( + Program, UR_PROGRAM_INFO_BINARY_SIZES, + sizeof(size_t) * BinarySizes.size(), BinarySizes.data(), nullptr); + + // Sum up binary sizes. + ProgramSize = + std::accumulate(BinarySizes.begin(), BinarySizes.end(), 0); + } catch (const exception &Ex) { + std::cerr << "Failed to get program size: " << Ex.what() << std::endl; + std::rethrow_exception(std::current_exception()); + } + // Store program size in the cache. + size_t CurrCacheSize = 0; + { + std::lock_guard Lock(MProgramCacheMutex); + MCachedPrograms.ProgramSizeMap[Program] = ProgramSize; + MCachedPrograms.ProgramCacheSizeInBytes += ProgramSize; + CurrCacheSize = MCachedPrograms.ProgramCacheSizeInBytes; + } + + // Evict programs if the cache size exceeds the threshold. + if (CurrCacheSize > ProgramCacheEvictionThreshold) + evictPrograms(ProgramCacheEvictionThreshold, CurrCacheSize); + } + // If the program is already in the cache, move it to the end of the list. + // Since we are following LRU eviction policy, we need to move the program + // to the end of the list. Items in the front of the list are the least + // recently This code path is "hot" and should be very fast. + else { + std::lock_guard Lock(MProgramEvictionListMutex); + MEvictionList.moveToEnd(CacheKey); + } + } + /// Clears cache state. /// /// This member function should only be used in unit tests. @@ -317,6 +623,11 @@ class KernelProgramCache { MCachedPrograms = ProgramCache{}; MKernelsPerProgramCache = KernelCacheT{}; MKernelFastCache = KernelFastCacheT{}; + MProgramToKernelFastCacheKeyMap.clear(); + + // Clear the eviction lists and its mutexes. + std::lock_guard EvictionListLock(MProgramEvictionListMutex); + MEvictionList.clear(); } /// Try to fetch entity (kernel or program) from cache. If there is no such @@ -341,8 +652,10 @@ class KernelProgramCache { /// /// \return a pointer to cached build result, return value must not be /// nullptr. - template - auto getOrBuild(GetCachedBuildFT &&GetCachedBuild, BuildFT &&Build) { + template + auto getOrBuild(GetCachedBuildFT &&GetCachedBuild, BuildFT &&Build, + EvictFT &&EvictFunc = nullptr) { using BuildState = KernelProgramCache::BuildState; constexpr size_t MaxAttempts = 2; for (size_t AttemptCounter = 0;; ++AttemptCounter) { @@ -356,8 +669,11 @@ class KernelProgramCache { BuildState NewState = BuildResult->waitUntilTransition(); // Build succeeded. - if (NewState == BuildState::BS_Done) + if (NewState == BuildState::BS_Done) { + if constexpr (!std::is_same_v) + EvictFunc(BuildResult->Val, /*IsBuilt=*/false); return BuildResult; + } // Build failed, or this is the last attempt. if (NewState == BuildState::BS_Failed || @@ -381,6 +697,9 @@ class KernelProgramCache { try { BuildResult->Val = Build(); + if constexpr (!std::is_same_v) + EvictFunc(BuildResult->Val, /*IsBuilt=*/true); + BuildResult->updateAndNotify(BuildState::BS_Done); return BuildResult; } catch (const exception &Ex) { @@ -414,6 +733,16 @@ class KernelProgramCache { std::mutex MKernelFastCacheMutex; KernelFastCacheT MKernelFastCache; + + // Map between fast kernel cache keys and program handle. + // MKernelFastCacheMutex will be used for synchronization. + std::unordered_map> + MProgramToKernelFastCacheKeyMap; + + EvictionList MEvictionList; + // Mutexes that will be used when accessing the eviction lists. + std::mutex MProgramEvictionListMutex; + friend class ::MockKernelProgramCache; const AdapterPtr &getAdapter(); diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 8f13c0745ad21..dfc5d019051a9 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -925,7 +925,13 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( if (!SYCLConfig::get()) return BuildF(); - auto BuildResult = Cache.getOrBuild(GetCachedBuildF, BuildF); + auto EvictFunc = [&Cache, &CacheKey](ur_program_handle_t Program, + bool isBuilt) { + return Cache.registerProgramFetch(CacheKey, Program, isBuilt); + }; + + auto BuildResult = + Cache.getOrBuild(GetCachedBuildF, BuildF, EvictFunc); // getOrBuild is not supposed to return nullptr assert(BuildResult != nullptr && "Invalid build result"); @@ -939,10 +945,12 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( // update it here and re-use that lambda. CacheKey.first.second = BImg->getImageID(); bool DidInsert = Cache.insertBuiltProgram(CacheKey, ResProgram); - if (DidInsert) { + + // Add to the eviction list. + Cache.registerProgramFetch(CacheKey, ResProgram, DidInsert); + if (DidInsert) // For every cached copy of the program, we need to increment its refcount Adapter->call(ResProgram); - } } // If caching is enabled, one copy of the program handle will be @@ -2699,7 +2707,13 @@ device_image_plain ProgramManager::build(const device_image_plain &DeviceImage, return Cache.getOrInsertProgram(CacheKey); }; - auto BuildResult = Cache.getOrBuild(GetCachedBuildF, BuildF); + auto EvictFunc = [&Cache, &CacheKey](ur_program_handle_t Program, + bool isBuilt) { + return Cache.registerProgramFetch(CacheKey, Program, isBuilt); + }; + + auto BuildResult = + Cache.getOrBuild(GetCachedBuildF, BuildF, EvictFunc); // getOrBuild is not supposed to return nullptr assert(BuildResult != nullptr && "Invalid build result"); @@ -2728,7 +2742,7 @@ device_image_plain ProgramManager::build(const device_image_plain &DeviceImage, } // Change device in the cache key to reduce copying of spec const data. CacheKey.second = Subset; - Cache.getOrBuild(GetCachedBuildF, CacheSubsets); + Cache.getOrBuild(GetCachedBuildF, CacheSubsets, EvictFunc); // getOrBuild is not supposed to return nullptr assert(BuildResult != nullptr && "Invalid build result"); } diff --git a/sycl/unittests/assert/assert.cpp b/sycl/unittests/assert/assert.cpp index b45996238358f..e11184d3a24d2 100644 --- a/sycl/unittests/assert/assert.cpp +++ b/sycl/unittests/assert/assert.cpp @@ -319,6 +319,18 @@ static ur_result_t redefinedProgramGetInfo(void *pParams) { return UR_RESULT_SUCCESS; } + // Required if program cache eviction is enabled. + if (UR_PROGRAM_INFO_BINARY_SIZES == *params.ppropName) { + size_t BinarySize = 1; + + if (*params.ppPropValue) + memcpy(*params.ppPropValue, &BinarySize, sizeof(size_t)); + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(size_t); + + return UR_RESULT_SUCCESS; + } + return UR_RESULT_ERROR_UNKNOWN; } diff --git a/sycl/unittests/config/ConfigTests.cpp b/sycl/unittests/config/ConfigTests.cpp index 3022ccbd52e65..756a340c8f82d 100644 --- a/sycl/unittests/config/ConfigTests.cpp +++ b/sycl/unittests/config/ConfigTests.cpp @@ -324,3 +324,66 @@ TEST(ConfigTests, CheckSyclCacheTraceTest) { sycl::detail::SYCLConfig::reset(); TestConfig(0, 0, 0, 0); } + +// SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD accepts an integer that specifies +// the maximum size of the in-memory Program cache. +// Cache eviction is performed when the cache size exceeds the threshold. +// The thresholds are specified in bytes. +// The default value is "0" which means that eviction is disabled. +TEST(ConfigTests, CheckSyclCacheEvictionThresholdTest) { + + using InMemEvicType = + sycl::detail::SYCLConfig; + + // Lambda to test parsing of SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD. + auto TestConfig = [](int expectedProgramCacheSize) { + EXPECT_EQ(expectedProgramCacheSize, InMemEvicType::getProgramCacheSize()); + EXPECT_EQ(expectedProgramCacheSize > 0, + InMemEvicType::isProgramCacheEvictionEnabled()); + }; + + // Lambda to set SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD. + auto SetSyclInMemCacheEvictionThresholdEnv = [](const char *value) { +#ifdef _WIN32 + _putenv_s("SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD", value); +#else + setenv("SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD", value, 1); +#endif + }; + + // Lambda to test invalid inputs. An exception should be thrown + // when parsing invalid values. + auto TestInvalidValues = [&](const char *value, const char *errMsg) { + SetSyclInMemCacheEvictionThresholdEnv(value); + try { + InMemEvicType::reset(); + TestConfig(0); + FAIL() << errMsg; + } catch (...) { + } + }; + + // Test eviction threshold with zero. + SetSyclInMemCacheEvictionThresholdEnv("0"); + sycl::detail::readConfig(true); + TestConfig(0); + + // Test invalid values. + TestInvalidValues("-1", "Should throw exception for negative value"); + TestInvalidValues("a", "Should throw exception for non-integer value"); + + // Test valid values. + SetSyclInMemCacheEvictionThresholdEnv("1024"); + InMemEvicType::reset(); + TestConfig(1024); + + // When SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD is not set, it should default to + // 0:0:0. +#ifdef _WIN32 + _putenv_s("SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD", ""); +#else + unsetenv("SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD"); +#endif + InMemEvicType::reset(); + TestConfig(0); +} diff --git a/sycl/unittests/kernel-and-program/CMakeLists.txt b/sycl/unittests/kernel-and-program/CMakeLists.txt index 8736f6f60a76a..0d06d2fc29aa0 100644 --- a/sycl/unittests/kernel-and-program/CMakeLists.txt +++ b/sycl/unittests/kernel-and-program/CMakeLists.txt @@ -7,5 +7,6 @@ add_sycl_unittest(KernelAndProgramTests OBJECT PersistentDeviceCodeCache.cpp KernelBuildOptions.cpp OutOfResources.cpp + InMemCacheEviction.cpp ) target_compile_definitions(KernelAndProgramTests PRIVATE -D__SYCL_INTERNAL_API) diff --git a/sycl/unittests/kernel-and-program/InMemCacheEviction.cpp b/sycl/unittests/kernel-and-program/InMemCacheEviction.cpp new file mode 100644 index 0000000000000..70c121053cee9 --- /dev/null +++ b/sycl/unittests/kernel-and-program/InMemCacheEviction.cpp @@ -0,0 +1,225 @@ +//==----- InMemCacheEviction.cpp --- In-memory cache eviction tests -------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file contains tests covering eviction in in-memory program cache. + +#define SYCL2020_DISABLE_DEPRECATION_WARNINGS + +#include "../thread_safety/ThreadUtils.h" +#include "detail/context_impl.hpp" +#include "detail/kernel_program_cache.hpp" +#include +#include +#include +#include + +#include + +#include + +using namespace sycl; + +class Kernel1; +class Kernel2; +class Kernel3; + +MOCK_INTEGRATION_HEADER(Kernel1) +MOCK_INTEGRATION_HEADER(Kernel2) +MOCK_INTEGRATION_HEADER(Kernel3) + +static sycl::unittest::MockDeviceImage Img[] = { + sycl::unittest::generateDefaultImage({"Kernel1"}), + sycl::unittest::generateDefaultImage({"Kernel2"}), + sycl::unittest::generateDefaultImage({"Kernel3"})}; + +static sycl::unittest::MockDeviceImageArray<3> ImgArray{Img}; + +// Number of times urProgramCreateWithIL is called. This is used to check +// if the program is created or fetched from the cache. +static int NumProgramBuild = 0; + +constexpr int ProgramSize = 10000; + +static ur_result_t redefinedProgramCreateWithIL(void *) { + ++NumProgramBuild; + return UR_RESULT_SUCCESS; +} + +static ur_result_t redefinedProgramGetInfoAfter(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_PROGRAM_INFO_NUM_DEVICES) { + auto value = reinterpret_cast(*params.ppPropValue); + *value = 1; + } + + if (*params.ppropName == UR_PROGRAM_INFO_BINARY_SIZES) { + auto value = reinterpret_cast(*params.ppPropValue); + value[0] = ProgramSize; + } + + if (*params.ppropName == UR_PROGRAM_INFO_BINARIES) { + auto value = reinterpret_cast(*params.ppPropValue); + value[0] = 0; + } + + return UR_RESULT_SUCCESS; +} + +// Function to set SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD. +static void setCacheEvictionEnv(const char *value) { +#ifdef _WIN32 + _putenv_s("SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD", value); +#else + if (value) + setenv("SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD", value, 1); + else + (void)unsetenv("SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD"); +#endif + + sycl::detail::readConfig(true); + sycl::detail::SYCLConfig< + sycl::detail::SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD>::reset(); +} + +// Function to check number of entries in the cache and eviction list. +static inline void +CheckNumberOfEntriesInCacheAndEvictionList(detail::context_impl &CtxImpl, + size_t ExpectedNumEntries) { + auto &KPCache = CtxImpl.getKernelProgramCache(); + EXPECT_EQ(KPCache.acquireCachedPrograms().get().size(), ExpectedNumEntries) + << "Unexpected number of entries in the cache"; + auto EvcList = KPCache.acquireEvictionList(); + EXPECT_EQ(EvcList.get().size(), ExpectedNumEntries) + << "Unexpected number of entries in the eviction list"; +} + +class InMemCacheEvictionTests : public ::testing::Test { +protected: + void TearDown() override { setCacheEvictionEnv(""); } +}; + +TEST(InMemCacheEvictionTests, TestBasicEvictionAndLRU) { + NumProgramBuild = 0; + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_before_callback("urProgramCreateWithIL", + &redefinedProgramCreateWithIL); + mock::getCallbacks().set_after_callback("urProgramGetInfo", + &redefinedProgramGetInfoAfter); + + sycl::platform Plt{sycl::platform()}; + sycl::context Ctx{Plt}; + auto CtxImpl = detail::getSyclObjImpl(Ctx); + queue q(Ctx, default_selector_v); + + // One program is of 10000 bytes, so 20005 eviction threshold can + // accommodate two programs. + setCacheEvictionEnv("20005"); + + // Cache is empty, so one urProgramCreateWithIL call. + q.single_task([] {}); + EXPECT_EQ(NumProgramBuild, 1); + CheckNumberOfEntriesInCacheAndEvictionList(*CtxImpl, 1); + + q.single_task([] {}); + EXPECT_EQ(NumProgramBuild, 2); + CheckNumberOfEntriesInCacheAndEvictionList(*CtxImpl, 2); + + // Move first program to end of eviction list. + q.single_task([] {}); + EXPECT_EQ(NumProgramBuild, 2); + + // Calling Kernel3, Kernel2, and Kernel1 in a cyclic manner to + // verify LRU's working. + + // Kernel2's program should have been evicted. + q.single_task([] {}); + EXPECT_EQ(NumProgramBuild, 3); + CheckNumberOfEntriesInCacheAndEvictionList(*CtxImpl, 2); + + // Calling Kernel2 again should trigger urProgramCreateWithIL and + // should evict Kernel1's program. + q.single_task([] {}); + EXPECT_EQ(NumProgramBuild, 3); + CheckNumberOfEntriesInCacheAndEvictionList(*CtxImpl, 2); + + // Calling Kernel1 again should trigger urProgramCreateWithIL and + // should evict Kernel3's program. + q.single_task([] {}); + EXPECT_EQ(NumProgramBuild, 4); + CheckNumberOfEntriesInCacheAndEvictionList(*CtxImpl, 2); +} + +// Test to verify eviction using concurrent kernel invocation. +TEST(InMemCacheEvictionTests, TestConcurrentEvictionSameQueue) { + NumProgramBuild = 0; + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_before_callback("urProgramCreateWithIL", + &redefinedProgramCreateWithIL); + mock::getCallbacks().set_after_callback("urProgramGetInfo", + &redefinedProgramGetInfoAfter); + + sycl::platform Plt{sycl::platform()}; + context Ctx{Plt}; + auto CtxImpl = detail::getSyclObjImpl(Ctx); + queue q(Ctx, default_selector_v); + + // One program is of 10000 bytes, so 20005 eviction threshold can + // accommodate two programs. + setCacheEvictionEnv("20005"); + + constexpr size_t ThreadCount = 200; + Barrier barrier(ThreadCount); + { + auto ConcurrentInvokeKernels = [&](std::size_t threadId) { + barrier.wait(); + q.single_task([] {}); + q.single_task([] {}); + q.single_task([] {}); + }; + + ThreadPool MPool(ThreadCount, ConcurrentInvokeKernels); + } + q.wait_and_throw(); + + CheckNumberOfEntriesInCacheAndEvictionList(*CtxImpl, 2); +} + +// Test to verify eviction using concurrent kernel invocation when +// cache size is very less so as to trigger immediate eviction. +TEST(InMemCacheEvictionTests, TestConcurrentEvictionSmallCache) { + NumProgramBuild = 0; + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_before_callback("urProgramCreateWithIL", + &redefinedProgramCreateWithIL); + mock::getCallbacks().set_after_callback("urProgramGetInfo", + &redefinedProgramGetInfoAfter); + + context Ctx{platform()}; + auto CtxImpl = detail::getSyclObjImpl(Ctx); + queue q(Ctx, default_selector_v); + + // One program is of 10000 bytes, so 100 eviction threshold will + // trigger immediate eviction. + setCacheEvictionEnv("100"); + + // Fetch the same kernel concurrently from multiple threads. + // This should cause some threads to insert a program and other + // threads to evict the same program. + constexpr size_t ThreadCount = 300; + Barrier barrier(ThreadCount); + { + auto ConcurrentInvokeKernels = [&](std::size_t threadId) { + barrier.wait(); + q.single_task([] {}); + }; + + ThreadPool MPool(ThreadCount, ConcurrentInvokeKernels); + } + q.wait_and_throw(); + + CheckNumberOfEntriesInCacheAndEvictionList(*CtxImpl, 0); +}