Skip to content

Commit

Permalink
Umpire logs refactor - table output + adding percentages (#3052)
Browse files Browse the repository at this point in the history
  • Loading branch information
arng40 authored Jul 2, 2024
1 parent f0a4c31 commit c74702a
Show file tree
Hide file tree
Showing 4 changed files with 214 additions and 21 deletions.
4 changes: 4 additions & 0 deletions src/coreComponents/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ set( common_headers
Format.hpp
GEOS_RAJA_Interface.hpp
GeosxMacros.hpp
MemoryInfos.hpp
Logger.hpp
MpiWrapper.hpp
Path.hpp
Expand Down Expand Up @@ -40,6 +41,7 @@ endif( )
set( common_sources
BufferAllocator.cpp
DataTypes.cpp
MemoryInfos.cpp
Logger.cpp
MpiWrapper.cpp
Path.cpp
Expand Down Expand Up @@ -93,6 +95,8 @@ if( ENABLE_CALIPER )
endif()
endif()

set( dependencyList ${dependencyList} fileIO codingUtilities )

This comment has been minimized.

Copy link
@rrsettgast

rrsettgast Jul 15, 2024

Member

@arng40 @MelReyCG
I didn't catch it at the time, but this is a pretty substantial circular dependency with both of these components. I think common shouldn't have a dependency on anything inside of geos.

This comment has been minimized.

Copy link
@MelReyCG

MelReyCG Jul 16, 2024

Contributor

@rrsettgast
I totally agree, I was thinking of reorganizing this because it clearly appear that fileIO is now both on top and bottom of the GEOS business components.
I propose these key points to reorganize a bit:

  • Section and Table go in a common/format folder,
  • common/logger will go in its own folder (it is going to contain more files).
    image

This comment has been minimized.

Copy link
@rrsettgast

rrsettgast Jul 16, 2024

Member

@MelReyCG I think that makes sense. Lets talk on Thursday about this


blt_add_library( NAME common
SOURCES ${common_sources}
HEADERS ${common_headers}
Expand Down
68 changes: 68 additions & 0 deletions src/coreComponents/common/MemoryInfos.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* ------------------------------------------------------------------------------------------------------------
* SPDX-LiCense-Identifier: LGPL-2.1-only
*
* Copyright (c) 2018-2020 Lawrence Livermore National Security LLC
* Copyright (c) 2018-2020 The Board of Trustees of the Leland Stanford Junior University
* Copyright (c) 2018-2020 TotalEnergies
* Copyright (c) 2019- GEOSX Contributors
* All rights reserved
*
* See top level LICENSE, COPYRIGHT, CONTRIBUTORS, NOTICE, and ACKNOWLEDGEMENTS files for details.
* ------------------------------------------------------------------------------------------------------------
*/

#include "MemoryInfos.hpp"

namespace geos
{
MemoryInfos::MemoryInfos( umpire::MemoryResourceTraits::resource_type resourceType ):
m_totalMemory( 0 ),
m_availableMemory( 0 ),
m_physicalMemoryHandled( 1 )
{
switch( resourceType )
{
case umpire::MemoryResourceTraits::resource_type::host:
case umpire::MemoryResourceTraits::resource_type::pinned:
#if defined( _SC_PHYS_PAGES ) && defined( _SC_PAGESIZE )
m_totalMemory = sysconf( _SC_PHYS_PAGES ) * sysconf( _SC_PAGESIZE );
m_availableMemory = sysconf( _SC_AVPHYS_PAGES ) * sysconf( _SC_PAGESIZE );
#else
GEOS_WARNING( "Unknown device physical memory size getter for this compiler." );
m_physicalMemoryHandled = 0;
#endif
break;
case umpire::MemoryResourceTraits::resource_type::device:
case umpire::MemoryResourceTraits::resource_type::device_const:
case umpire::MemoryResourceTraits::resource_type::um:
#if defined( GEOS_USE_CUDA )
cudaMemGetInfo( &m_availableMemory, &m_totalMemory );
#else
GEOS_WARNING( "Unknown device physical memory size getter for this compiler." );
m_physicalMemoryHandled = 0;
#endif
break;
default:
GEOS_WARNING( "Physical memory lookup not implemented" );
m_physicalMemoryHandled = 0;
break;
}
}

size_t MemoryInfos::getTotalMemory() const
{
return m_totalMemory;
}

size_t MemoryInfos::getAvailableMemory() const
{
return m_availableMemory;
}

bool MemoryInfos::isPhysicalMemoryHandled() const
{
return m_physicalMemoryHandled;
}

}
72 changes: 72 additions & 0 deletions src/coreComponents/common/MemoryInfos.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* ------------------------------------------------------------------------------------------------------------
* SPDX-LiCense-Identifier: LGPL-2.1-only
*
* Copyright (c) 2018-2020 Lawrence Livermore National Security LLC
* Copyright (c) 2018-2020 The Board of Trustees of the Leland Stanford Junior University
* Copyright (c) 2018-2020 TotalEnergies
* Copyright (c) 2019- GEOSX Contributors
* All rights reserved
*
* See top level LICENSE, COPYRIGHT, CONTRIBUTORS, NOTICE, and ACKNOWLEDGEMENTS files for details.
* ------------------------------------------------------------------------------------------------------------
*/

#ifndef GEOS_COMMON_MemoryInfos_HPP_
#define GEOS_COMMON_MemoryInfos_HPP_

#include "umpire/util/MemoryResourceTraits.hpp"
#include "common/Logger.hpp"
#include <unistd.h>
#include <iostream>
#if defined( GEOS_USE_CUDA )
#include <cuda.h>
#endif

namespace geos
{

/**
* @class MemoryInfos
* @brief Class to fetch and store memory information for different resource types.
*/
class MemoryInfos
{
public:

/**
* @brief Constructor for MemoryInfos.
* @param resourceType The type of memory resource.
*/
MemoryInfos( umpire::MemoryResourceTraits::resource_type resourceType );

/**
* @brief Get the total memory available for the resource type.
* @return Total memory in bytes.
*/
size_t getTotalMemory() const;

/**
* @brief Get the available memory for the resource type.
* @return Available memory in bytes.
*/
size_t getAvailableMemory() const;

/**
* @brief Check if physical memory is handled.
* @return True if physical memory is handled, false otherwise.
*/
bool isPhysicalMemoryHandled() const;
private:

///total memory available.
size_t m_totalMemory;
///Available memory.
size_t m_availableMemory;
///Flag indicating if physical memory is handled.
bool m_physicalMemoryHandled;
};

}

#endif
91 changes: 70 additions & 21 deletions src/coreComponents/common/initializeEnvironment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,18 @@
#include "TimingMacros.hpp"
#include "Path.hpp"
#include "LvArray/src/system.hpp"

#include "fileIO/Table/TableLayout.hpp"
#include "fileIO/Table/TableData.hpp"
#include "fileIO/Table/TableFormatter.hpp"
#include "common/LifoStorageCommon.hpp"
#include "common/MemoryInfos.hpp"
#include <umpire/TypedAllocator.hpp>
// TPL includes
#include <umpire/ResourceManager.hpp>
#include <umpire/Allocator.hpp>
#include <umpire/strategy/AllocationStrategy.hpp>
#include "umpire/util/MemoryResourceTraits.hpp"
#include "umpire/util/Platform.hpp"

#if defined( GEOSX_USE_CALIPER )
#include <caliper/cali-manager.h>
Expand All @@ -46,13 +55,11 @@
#if defined( GEOS_USE_HIP )
#include <hip/hip_runtime.h>
#endif

#include <cfenv>

namespace geos
{


///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void setupLogger()
{
Expand Down Expand Up @@ -248,7 +255,9 @@ void finalizeCaliper()
static void addUmpireHighWaterMarks()
{
umpire::ResourceManager & rm = umpire::ResourceManager::getInstance();

integer size;
MPI_Comm_size( MPI_COMM_WORLD, &size );
size_t nbRank = (std::size_t)size;
// Get a list of all the allocators and sort it so that it's in the same order on each rank.
std::vector< string > allocatorNames = rm.getAllocatorNames();
std::sort( allocatorNames.begin(), allocatorNames.end() );
Expand All @@ -264,43 +273,84 @@ static void addUmpireHighWaterMarks()
}

// Loop over the allocators.
constexpr int MAX_NAME_LENGTH = 100;
char allocatorNameBuffer[ MAX_NAME_LENGTH + 1 ];
char allocatorNameMinCharsBuffer[ MAX_NAME_LENGTH + 1 ];
unsigned MAX_NAME_LENGTH = 100;

TableData tableData;
for( string const & allocatorName : allocatorNames )
{
// Skip umpire internal allocators.
if( allocatorName.rfind( "__umpire_internal", 0 ) == 0 )
continue;

GEOS_ERROR_IF_GT( allocatorName.size(), MAX_NAME_LENGTH );
string allocatorNameFixedSize = allocatorName;
allocatorNameFixedSize.resize( MAX_NAME_LENGTH, '\0' );
string allocatorNameMinChars = string( MAX_NAME_LENGTH, '\0' );

memset( allocatorNameBuffer, '\0', sizeof( allocatorNameBuffer ) );
memcpy( allocatorNameBuffer, allocatorName.data(), allocatorName.size() );
// Make sure that each rank is looking at the same allocator.
MpiWrapper::allReduce( allocatorNameFixedSize.c_str(), &allocatorNameMinChars.front(), MAX_NAME_LENGTH, MPI_MIN, MPI_COMM_GEOSX );
if( allocatorNameFixedSize != allocatorNameMinChars )
{
GEOS_WARNING( "Not all ranks have an allocator named " << allocatorNameFixedSize << ", cannot compute high water mark." );
continue;
}

memset( allocatorNameMinCharsBuffer, '\0', sizeof( allocatorNameMinCharsBuffer ) );
umpire::Allocator allocator = rm.getAllocator( allocatorName );
umpire::strategy::AllocationStrategy const * allocationStrategy = allocator.getAllocationStrategy();
umpire::MemoryResourceTraits const traits = allocationStrategy->getTraits();
umpire::MemoryResourceTraits::resource_type resourceType = traits.resource;
MemoryInfos const memInfos( resourceType );

// Make sure that each rank is looking at the same allocator.
MpiWrapper::allReduce( allocatorNameBuffer, allocatorNameMinCharsBuffer, MAX_NAME_LENGTH, MPI_MIN, MPI_COMM_GEOSX );
if( strcmp( allocatorNameBuffer, allocatorNameMinCharsBuffer ) != 0 )
if( !memInfos.isPhysicalMemoryHandled() )
{
GEOS_WARNING( "Not all ranks have an allocator named " << allocatorNameBuffer << ", cannot compute high water mark." );
continue;
}

// Get the total number of bytes allocated with this allocator across ranks.
// This is a little redundant since
std::size_t const mark = rm.getAllocator( allocatorName ).getHighWatermark();
std::size_t const totalMark = MpiWrapper::sum( mark );
std::size_t const mark = allocator.getHighWatermark();
std::size_t const minMark = MpiWrapper::min( mark );
std::size_t const maxMark = MpiWrapper::max( mark );
GEOS_LOG_RANK_0( "Umpire " << std::setw( 15 ) << allocatorName << " sum across ranks: " <<
std::setw( 9 ) << LvArray::system::calculateSize( totalMark ) );
GEOS_LOG_RANK_0( "Umpire " << std::setw( 15 ) << allocatorName << " rank max: " <<
std::setw( 9 ) << LvArray::system::calculateSize( maxMark ) );
std::size_t const sumMark = MpiWrapper::sum( mark );

string percentage;
if( memInfos.getTotalMemory() == 0 )
{
percentage = 0.0;
GEOS_WARNING( "umpire memory percentage could not be resolved" );
}
else
{
percentage = GEOS_FMT( "({:.1f}%)", ( 100.0f * (float)mark ) / (float)memInfos.getTotalMemory() );
}

string const minMarkValue = GEOS_FMT( "{} {:>8}",
LvArray::system::calculateSize( minMark ), percentage );
string const maxMarkValue = GEOS_FMT( "{} {:>8}",
LvArray::system::calculateSize( maxMark ), percentage );
string const avgMarkValue = GEOS_FMT( "{} {:>8}",
LvArray::system::calculateSize( sumMark / nbRank ), percentage );
string const sumMarkValue = GEOS_FMT( "{} {:>8}",
LvArray::system::calculateSize( sumMark ), percentage );

tableData.addRow( allocatorName,
minMarkValue,
maxMarkValue,
avgMarkValue,
sumMarkValue );

pushStatsIntoAdiak( allocatorName + " sum across ranks", mark );
pushStatsIntoAdiak( allocatorName + " rank max", mark );
}

TableLayout const memoryStatLayout ( {"Umpire Memory Pool\n(reserved / % over total)",
"Min over ranks",
"Max over ranks",
"Avg over ranks",
"Sum over ranks" } );
TableTextFormatter const memoryStatLog( memoryStatLayout );

GEOS_LOG_RANK_0( memoryStatLog.toString( tableData ));
}


Expand All @@ -324,5 +374,4 @@ void cleanupEnvironment()
finalizeMPI();
}


} // namespace geos

0 comments on commit c74702a

Please sign in to comment.