Skip to content

Commit

Permalink
Fix compilation issues and PVT Driver unit test on Pangea3 (#2910)
Browse files Browse the repository at this point in the history
* removed access to private class members from device code

* modified host config for P3

* removed CUDA Hypre flag (does not completely work on P3)

* Re-enabled cuda acceleration for hypre

---------

Co-authored-by: Matteo Cusini <[email protected]>
  • Loading branch information
2 people authored and ouassimkh committed Feb 16, 2024
1 parent 946cbc6 commit fb5f688
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 11 deletions.
10 changes: 4 additions & 6 deletions host-configs/TOTAL/pangea3-gcc8.4.1-openmpi-4.1.2.cmake
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# hostconfig for pangea3
#
# export MODULEPATH=/workrd/SCR/NUM/geosx_num/module_files:$MODULEPATH
# module load cmake/3.21.4 gcc/8.4.1 cuda/11.0.3 ompi/4.1.2 openblas/0.3.18 python4geosx/p3/gcc8.4.1-ompi4.1.2
#
set(CONFIG_NAME "pangea3-gcc8.4.1-ompi-4.1.2" CACHE PATH "")

Expand Down Expand Up @@ -56,15 +54,15 @@ if (DEFINED ENV{CUDA_ROOT})
set(CMAKE_CUDA_COMPILER ${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc CACHE STRING "")
set(CUDA_ARCH sm_70 CACHE STRING "")
set(CMAKE_CUDA_ARCHITECTURES 70 CACHE STRING "")
set(CMAKE_CUDA_FLAGS "-restrict -arch ${CUDA_ARCH} --expt-extended-lambda -Werror cross-execution-space-call,reorder,deprecated-declarations" CACHE STRING "")
set(CMAKE_CUDA_FLAGS "-restrict -arch ${CUDA_ARCH} --expt-relaxed-constexpr --expt-extended-lambda -Werror cross-execution-space-call,reorder,deprecated-declarations" CACHE STRING "")
set(CMAKE_CUDA_FLAGS_RELEASE "-O3 -DNDEBUG -Xcompiler -DNDEBUG -Xcompiler -O3 -Xcompiler -mcpu=powerpc64le -Xcompiler -mtune=powerpc64le" CACHE STRING "")
set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-g -lineinfo ${CMAKE_CUDA_FLAGS_RELEASE}" CACHE STRING "")
set(CMAKE_CUDA_FLAGS_DEBUG "-g -G -O0 -Xcompiler -O0" CACHE STRING "")

# Uncomment this line to make nvcc output register usage for each kernel.
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --resource-usage" CACHE STRING "" FORCE)
else()
message(FATAL_ERROR "You must have CUDA_ROOT environment variable set, we advise loading module cuda/11.0.3")
message(FATAL_ERROR "You must have CUDA_ROOT environment variable set, we advise loading module cuda/11.5.0")
endif()

# GTEST options
Expand Down Expand Up @@ -108,7 +106,7 @@ set(ENABLE_PETSC OFF CACHE BOOL "")
set(ENABLE_HYPRE ON CACHE BOOL "")
set(ENABLE_HYPRE_DEVICE "CUDA" CACHE BOOL "")

# activate workaround for fmt formatter
set(ENABLE_FMT_CONST_FORMATTER_WORKAROUND ON CACHE BOOL "")
# disable benchmarks, they are incompatible with P3's nvcc version (cuda 11.5.0)
set(ENABLE_BENCHMARKS OFF CACHE BOOL "")

include( ${CMAKE_CURRENT_LIST_DIR}/../tpls.cmake )
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,16 @@ void PVTDriver::runTest( FLUID_TYPE & fluid, arrayView2d< real64 > const & table
// note: column indexing should be kept consistent with output file header below.

integer const numSteps = m_numSteps;
integer const outputCompressibility = m_outputCompressibility;
integer const outputPhaseComposition = m_outputPhaseComposition;
using ExecPolicy = typename FLUID_TYPE::exec_policy;
forAll< ExecPolicy >( composition.size( 0 ),
[this, numPhases, numComponents, numSteps, kernelWrapper,
table, composition]
[ outputCompressibility, outputPhaseComposition, numPhases, numComponents, numSteps, kernelWrapper,
table, composition]
GEOS_HOST_DEVICE ( localIndex const i )
{
// Index for start of phase properties
integer const PHASE = m_outputCompressibility != 0 ? TEMP + 3 : TEMP + 2;
integer const PHASE = outputCompressibility != 0 ? TEMP + 3 : TEMP + 2;

// Temporary space for phase mole fractions
stackArray1d< real64, constitutive::MultiFluidBase::MAX_NUM_COMPONENTS > phaseComposition( numComponents );
Expand All @@ -81,7 +83,7 @@ void PVTDriver::runTest( FLUID_TYPE & fluid, arrayView2d< real64 > const & table
kernelWrapper.update( i, 0, table( n, PRES ), table( n, TEMP ), composition[i] );
table( n, TEMP + 1 ) = kernelWrapper.totalDensity()( i, 0 );

if( m_outputCompressibility != 0 )
if( outputCompressibility != 0 )
{
table( n, TEMP + 2 ) = kernelWrapper.totalCompressibility( i, 0 );
}
Expand All @@ -92,7 +94,7 @@ void PVTDriver::runTest( FLUID_TYPE & fluid, arrayView2d< real64 > const & table
table( n, PHASE + p + numPhases ) = kernelWrapper.phaseDensity()( i, 0, p );
table( n, PHASE + p + 2 * numPhases ) = kernelWrapper.phaseViscosity()( i, 0, p );
}
if( m_outputPhaseComposition != 0 )
if( outputPhaseComposition != 0 )
{
for( integer p = 0; p < numPhases; ++p )
{
Expand Down

0 comments on commit fb5f688

Please sign in to comment.