From 549001dd224e41229339dee083dd1a8a1cf07063 Mon Sep 17 00:00:00 2001 From: Arnaud DUDES <155963334+arng40@users.noreply.github.com> Date: Tue, 2 Jul 2024 22:56:23 +0200 Subject: [PATCH] Umpire logs refactor - table output + adding percentages (#3052) --- src/coreComponents/common/CMakeLists.txt | 4 + src/coreComponents/common/MemoryInfos.cpp | 68 ++++++++++++++ src/coreComponents/common/MemoryInfos.hpp | 72 +++++++++++++++ .../common/initializeEnvironment.cpp | 91 ++++++++++++++----- 4 files changed, 214 insertions(+), 21 deletions(-) create mode 100644 src/coreComponents/common/MemoryInfos.cpp create mode 100644 src/coreComponents/common/MemoryInfos.hpp diff --git a/src/coreComponents/common/CMakeLists.txt b/src/coreComponents/common/CMakeLists.txt index 5600a8fae85..1d829c19f2d 100644 --- a/src/coreComponents/common/CMakeLists.txt +++ b/src/coreComponents/common/CMakeLists.txt @@ -10,6 +10,7 @@ set( common_headers Format.hpp GEOS_RAJA_Interface.hpp GeosxMacros.hpp + MemoryInfos.hpp Logger.hpp MpiWrapper.hpp Path.hpp @@ -40,6 +41,7 @@ endif( ) set( common_sources BufferAllocator.cpp DataTypes.cpp + MemoryInfos.cpp Logger.cpp MpiWrapper.cpp Path.cpp @@ -93,6 +95,8 @@ if( ENABLE_CALIPER ) endif() endif() +set( dependencyList ${dependencyList} fileIO codingUtilities ) + blt_add_library( NAME common SOURCES ${common_sources} HEADERS ${common_headers} diff --git a/src/coreComponents/common/MemoryInfos.cpp b/src/coreComponents/common/MemoryInfos.cpp new file mode 100644 index 00000000000..98b0093c82f --- /dev/null +++ b/src/coreComponents/common/MemoryInfos.cpp @@ -0,0 +1,68 @@ +/* + * ------------------------------------------------------------------------------------------------------------ + * SPDX-LiCense-Identifier: LGPL-2.1-only + * + * Copyright (c) 2018-2020 Lawrence Livermore National Security LLC + * Copyright (c) 2018-2020 The Board of Trustees of the Leland Stanford Junior University + * Copyright (c) 2018-2020 TotalEnergies + * Copyright (c) 2019- GEOSX Contributors + * All rights reserved + * + * See top level LICENSE, COPYRIGHT, CONTRIBUTORS, NOTICE, and ACKNOWLEDGEMENTS files for details. + * ------------------------------------------------------------------------------------------------------------ + */ + +#include "MemoryInfos.hpp" + +namespace geos +{ +MemoryInfos::MemoryInfos( umpire::MemoryResourceTraits::resource_type resourceType ): + m_totalMemory( 0 ), + m_availableMemory( 0 ), + m_physicalMemoryHandled( 1 ) +{ + switch( resourceType ) + { + case umpire::MemoryResourceTraits::resource_type::host: + case umpire::MemoryResourceTraits::resource_type::pinned: + #if defined( _SC_PHYS_PAGES ) && defined( _SC_PAGESIZE ) + m_totalMemory = sysconf( _SC_PHYS_PAGES ) * sysconf( _SC_PAGESIZE ); + m_availableMemory = sysconf( _SC_AVPHYS_PAGES ) * sysconf( _SC_PAGESIZE ); + #else + GEOS_WARNING( "Unknown device physical memory size getter for this compiler." ); + m_physicalMemoryHandled = 0; + #endif + break; + case umpire::MemoryResourceTraits::resource_type::device: + case umpire::MemoryResourceTraits::resource_type::device_const: + case umpire::MemoryResourceTraits::resource_type::um: + #if defined( GEOS_USE_CUDA ) + cudaMemGetInfo( &m_availableMemory, &m_totalMemory ); + #else + GEOS_WARNING( "Unknown device physical memory size getter for this compiler." ); + m_physicalMemoryHandled = 0; + #endif + break; + default: + GEOS_WARNING( "Physical memory lookup not implemented" ); + m_physicalMemoryHandled = 0; + break; + } +} + +size_t MemoryInfos::getTotalMemory() const +{ + return m_totalMemory; +} + +size_t MemoryInfos::getAvailableMemory() const +{ + return m_availableMemory; +} + +bool MemoryInfos::isPhysicalMemoryHandled() const +{ + return m_physicalMemoryHandled; +} + +} diff --git a/src/coreComponents/common/MemoryInfos.hpp b/src/coreComponents/common/MemoryInfos.hpp new file mode 100644 index 00000000000..b2a22431d09 --- /dev/null +++ b/src/coreComponents/common/MemoryInfos.hpp @@ -0,0 +1,72 @@ +/* + * ------------------------------------------------------------------------------------------------------------ + * SPDX-LiCense-Identifier: LGPL-2.1-only + * + * Copyright (c) 2018-2020 Lawrence Livermore National Security LLC + * Copyright (c) 2018-2020 The Board of Trustees of the Leland Stanford Junior University + * Copyright (c) 2018-2020 TotalEnergies + * Copyright (c) 2019- GEOSX Contributors + * All rights reserved + * + * See top level LICENSE, COPYRIGHT, CONTRIBUTORS, NOTICE, and ACKNOWLEDGEMENTS files for details. + * ------------------------------------------------------------------------------------------------------------ + */ + +#ifndef GEOS_COMMON_MemoryInfos_HPP_ +#define GEOS_COMMON_MemoryInfos_HPP_ + +#include "umpire/util/MemoryResourceTraits.hpp" +#include "common/Logger.hpp" +#include +#include +#if defined( GEOS_USE_CUDA ) +#include +#endif + +namespace geos +{ + +/** + * @class MemoryInfos + * @brief Class to fetch and store memory information for different resource types. + */ +class MemoryInfos +{ +public: + + /** + * @brief Constructor for MemoryInfos. + * @param resourceType The type of memory resource. + */ + MemoryInfos( umpire::MemoryResourceTraits::resource_type resourceType ); + + /** + * @brief Get the total memory available for the resource type. + * @return Total memory in bytes. + */ + size_t getTotalMemory() const; + + /** + * @brief Get the available memory for the resource type. + * @return Available memory in bytes. + */ + size_t getAvailableMemory() const; + + /** + * @brief Check if physical memory is handled. + * @return True if physical memory is handled, false otherwise. + */ + bool isPhysicalMemoryHandled() const; +private: + + ///total memory available. + size_t m_totalMemory; + ///Available memory. + size_t m_availableMemory; + ///Flag indicating if physical memory is handled. + bool m_physicalMemoryHandled; +}; + +} + +#endif diff --git a/src/coreComponents/common/initializeEnvironment.cpp b/src/coreComponents/common/initializeEnvironment.cpp index 356b4c112f0..a86cbf15af9 100644 --- a/src/coreComponents/common/initializeEnvironment.cpp +++ b/src/coreComponents/common/initializeEnvironment.cpp @@ -17,9 +17,18 @@ #include "TimingMacros.hpp" #include "Path.hpp" #include "LvArray/src/system.hpp" - +#include "fileIO/Table/TableLayout.hpp" +#include "fileIO/Table/TableData.hpp" +#include "fileIO/Table/TableFormatter.hpp" +#include "common/LifoStorageCommon.hpp" +#include "common/MemoryInfos.hpp" +#include // TPL includes #include +#include +#include +#include "umpire/util/MemoryResourceTraits.hpp" +#include "umpire/util/Platform.hpp" #if defined( GEOSX_USE_CALIPER ) #include @@ -46,13 +55,11 @@ #if defined( GEOS_USE_HIP ) #include #endif - #include namespace geos { - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void setupLogger() { @@ -248,7 +255,9 @@ void finalizeCaliper() static void addUmpireHighWaterMarks() { umpire::ResourceManager & rm = umpire::ResourceManager::getInstance(); - + integer size; + MPI_Comm_size( MPI_COMM_WORLD, &size ); + size_t nbRank = (std::size_t)size; // Get a list of all the allocators and sort it so that it's in the same order on each rank. std::vector< string > allocatorNames = rm.getAllocatorNames(); std::sort( allocatorNames.begin(), allocatorNames.end() ); @@ -264,9 +273,9 @@ static void addUmpireHighWaterMarks() } // Loop over the allocators. - constexpr int MAX_NAME_LENGTH = 100; - char allocatorNameBuffer[ MAX_NAME_LENGTH + 1 ]; - char allocatorNameMinCharsBuffer[ MAX_NAME_LENGTH + 1 ]; + unsigned MAX_NAME_LENGTH = 100; + + TableData tableData; for( string const & allocatorName : allocatorNames ) { // Skip umpire internal allocators. @@ -274,33 +283,74 @@ static void addUmpireHighWaterMarks() continue; GEOS_ERROR_IF_GT( allocatorName.size(), MAX_NAME_LENGTH ); + string allocatorNameFixedSize = allocatorName; + allocatorNameFixedSize.resize( MAX_NAME_LENGTH, '\0' ); + string allocatorNameMinChars = string( MAX_NAME_LENGTH, '\0' ); - memset( allocatorNameBuffer, '\0', sizeof( allocatorNameBuffer ) ); - memcpy( allocatorNameBuffer, allocatorName.data(), allocatorName.size() ); + // Make sure that each rank is looking at the same allocator. + MpiWrapper::allReduce( allocatorNameFixedSize.c_str(), &allocatorNameMinChars.front(), MAX_NAME_LENGTH, MPI_MIN, MPI_COMM_GEOSX ); + if( allocatorNameFixedSize != allocatorNameMinChars ) + { + GEOS_WARNING( "Not all ranks have an allocator named " << allocatorNameFixedSize << ", cannot compute high water mark." ); + continue; + } - memset( allocatorNameMinCharsBuffer, '\0', sizeof( allocatorNameMinCharsBuffer ) ); + umpire::Allocator allocator = rm.getAllocator( allocatorName ); + umpire::strategy::AllocationStrategy const * allocationStrategy = allocator.getAllocationStrategy(); + umpire::MemoryResourceTraits const traits = allocationStrategy->getTraits(); + umpire::MemoryResourceTraits::resource_type resourceType = traits.resource; + MemoryInfos const memInfos( resourceType ); - // Make sure that each rank is looking at the same allocator. - MpiWrapper::allReduce( allocatorNameBuffer, allocatorNameMinCharsBuffer, MAX_NAME_LENGTH, MPI_MIN, MPI_COMM_GEOSX ); - if( strcmp( allocatorNameBuffer, allocatorNameMinCharsBuffer ) != 0 ) + if( !memInfos.isPhysicalMemoryHandled() ) { - GEOS_WARNING( "Not all ranks have an allocator named " << allocatorNameBuffer << ", cannot compute high water mark." ); continue; } // Get the total number of bytes allocated with this allocator across ranks. // This is a little redundant since - std::size_t const mark = rm.getAllocator( allocatorName ).getHighWatermark(); - std::size_t const totalMark = MpiWrapper::sum( mark ); + std::size_t const mark = allocator.getHighWatermark(); + std::size_t const minMark = MpiWrapper::min( mark ); std::size_t const maxMark = MpiWrapper::max( mark ); - GEOS_LOG_RANK_0( "Umpire " << std::setw( 15 ) << allocatorName << " sum across ranks: " << - std::setw( 9 ) << LvArray::system::calculateSize( totalMark ) ); - GEOS_LOG_RANK_0( "Umpire " << std::setw( 15 ) << allocatorName << " rank max: " << - std::setw( 9 ) << LvArray::system::calculateSize( maxMark ) ); + std::size_t const sumMark = MpiWrapper::sum( mark ); + + string percentage; + if( memInfos.getTotalMemory() == 0 ) + { + percentage = 0.0; + GEOS_WARNING( "umpire memory percentage could not be resolved" ); + } + else + { + percentage = GEOS_FMT( "({:.1f}%)", ( 100.0f * (float)mark ) / (float)memInfos.getTotalMemory() ); + } + + string const minMarkValue = GEOS_FMT( "{} {:>8}", + LvArray::system::calculateSize( minMark ), percentage ); + string const maxMarkValue = GEOS_FMT( "{} {:>8}", + LvArray::system::calculateSize( maxMark ), percentage ); + string const avgMarkValue = GEOS_FMT( "{} {:>8}", + LvArray::system::calculateSize( sumMark / nbRank ), percentage ); + string const sumMarkValue = GEOS_FMT( "{} {:>8}", + LvArray::system::calculateSize( sumMark ), percentage ); + + tableData.addRow( allocatorName, + minMarkValue, + maxMarkValue, + avgMarkValue, + sumMarkValue ); pushStatsIntoAdiak( allocatorName + " sum across ranks", mark ); pushStatsIntoAdiak( allocatorName + " rank max", mark ); } + + TableLayout const memoryStatLayout ( {"Umpire Memory Pool\n(reserved / % over total)", + "Min over ranks", + "Max over ranks", + "Avg over ranks", + "Sum over ranks" } ); + TableTextFormatter const memoryStatLog( memoryStatLayout ); + + GEOS_LOG_RANK_0( memoryStatLog.toString( tableData )); } @@ -324,5 +374,4 @@ void cleanupEnvironment() finalizeMPI(); } - } // namespace geos