Skip to content

Commit

Permalink
Merge Pull Request trilinos#11432 from vqd8a/Trilinos/adelus-add-old-…
Browse files Browse the repository at this point in the history
…factorsolve-interfaces

Automatically Merged using Trilinos Pull Request AutoTester
PR Title: Adelus: Add original factorsolve interfaces back for calls within EIGER
PR Author: vqd8a
  • Loading branch information
trilinos-autotester authored Jan 6, 2023
2 parents a5a8539 + 340dded commit 1118703
Showing 1 changed file with 346 additions and 1 deletion.
347 changes: 346 additions & 1 deletion packages/adelus/src/Adelus.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@

namespace Adelus {

/// Adelus GetDistirbution
/// Adelus GetDistribution
/// Gives the distribution information that is required by the dense solver

/// \param comm (In) - communicator that Adelus runs on
Expand Down Expand Up @@ -106,6 +106,49 @@ namespace Adelus {
return(0);

}

/// Adelus GetDistribution (old interface)
/// Gives the distribution information that is required by the dense solver

/// \param nprocs_row_ (In) - number of processors for a row
/// \param number_of_unknowns (In) - order of the dense matrix
/// \param nrhs_ (In) - number of right hand sides
/// \param my_rows_ (Out) - number of rows of the matrix on this processor
/// \param my_cols_ (Out) - number of columns of the matrix on this processor
/// \param my_first_row_ (Out) - first (global) row number on this processor (array starts at index 1)
/// \param my_first_col_ (Out) - first (global) column number on this processor (array starts at index 1)
/// \param my_rhs_ (Out) - number of right hand sides on this processor
/// \param my_row (Out) - row number in processor mesh, 0 to the number of processors for a column -1
/// \param my_col (Out) - column number in processor mesh, 0 to the number of processors for a row -1

inline
int GetDistribution( int* nprocs_row_,
int* number_of_unknowns,
int* nrhs_,
int* my_rows_,
int* my_cols_,
int* my_first_row_,
int* my_first_col_,
int* my_rhs_,
int* my_row,
int* my_col ) {
// This function echoes the multiprocessor distribution of the matrix

distmat_( MPI_COMM_WORLD,
*nprocs_row_,
*number_of_unknowns,
*nrhs_,
*my_rows_,
*my_cols_,
*my_first_row_,
*my_first_col_,
*my_rhs_,
*my_row,
*my_col );

return(0);

}

/// Adelus FactorSolve
/// Factors and solves the dense matrix
Expand All @@ -129,6 +172,44 @@ namespace Adelus {

}

/// Adelus FactorSolve (old interface)
/// Factors and solves the dense matrix

/// \param AA (InOut) -- Kokkos View that has the matrix and rhs packed (Note: matrix and rhs are overwritten)
/// \param my_rows_ (In) -- number of rows of the matrix on this processor
/// \param my_cols_ (In) -- number of columns of the matrix on this processor
/// \param matrix_size (In) -- order of the dense matrix
/// \param num_procsr (In) -- number of processors for a row
/// \param num_rhs (In) -- number of right hand sides
/// \param secs (Out) -- factor and solve time in seconds

template<class ZDView>
inline
void FactorSolve( ZDView AA,
int my_rows_,
int my_cols_,
int* matrix_size,
int* num_procsr,
int* num_rhs,
double* secs ) {
int rank;

MPI_Comm_rank(MPI_COMM_WORLD, &rank);

#ifdef PRINT_STATUS
printf("FactorSolve (Kokkos View interface) in rank %d -- my_rows %u , my_cols %u , matrix_size %u, num_procs_per_row %u, num_rhs %u\n", rank, my_rows_, my_cols_, *matrix_size, *num_procsr, *num_rhs);
#endif

using value_type = typename ZDView::value_type;
using execution_space = typename ZDView::device_type::execution_space;
using memory_space = typename ZDView::device_type::memory_space;

Adelus::AdelusHandle<value_type, execution_space, memory_space>
ahandle(0, MPI_COMM_WORLD, *matrix_size, *num_procsr, *num_rhs );
lusolve_(ahandle, AA, secs);

}

/// Adelus Factor
/// Factors the dense matrix for later solve

Expand Down Expand Up @@ -265,6 +346,72 @@ namespace Adelus {
#endif

lusolve_(ahandle, AA_i, secs);
#endif
}
}

/// Adelus FactorSolve_hostPtr (old interface)
/// Matrix and rhs are packed and passed as host pointer

inline
void FactorSolve_hostPtr( ADELUS_DATA_TYPE* AA,
int my_rows_,
int my_cols_,
int my_rhs_,
int* matrix_size,
int* num_procsr,
int* num_rhs,
double* secs ) {
int rank;

MPI_Comm_rank(MPI_COMM_WORLD, &rank) ;

{ // Note: To avoid segmentation fault when FactorSolve is called multiple times with the unmanaged View, it's safest to make sure unmanaged View falls out of scope before freeing its memory.
typedef Kokkos::View<Kokkos::complex<double>**,
Kokkos::LayoutLeft,
Kokkos::HostSpace,
Kokkos::MemoryTraits<Kokkos::Unmanaged> > AA_Internal;

AA_Internal AA_i(reinterpret_cast<Kokkos::complex<double> *>(AA), my_rows_, my_cols_ + my_rhs_ + 6);

#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
typedef Kokkos::View<Kokkos::complex<double>**,
Kokkos::LayoutLeft,
#ifdef KOKKOS_ENABLE_CUDA
Kokkos::CudaSpace> AA_Internal_dev;
#else
Kokkos::Experimental::HIPSpace> AA_Internal_dev;
#endif

AA_Internal_dev AA_i_dev( "AA_i_dev", my_rows_, my_cols_ + my_rhs_ + 6 );

#ifdef PRINT_STATUS
printf("FactorSolve_hostPtr with CUDA solve (double complex pointer interface) in rank %d -- my_rows %u , my_cols %u, my_rhs %u , matrix_size %u, num_procs_per_row %u, num_rhs %u\n", rank, my_rows_, my_cols_, my_rhs_, *matrix_size, *num_procsr, *num_rhs);
#endif

Kokkos::deep_copy( AA_i_dev, AA_i );

using value_type = typename AA_Internal_dev::value_type;
using execution_space = typename AA_Internal_dev::device_type::execution_space;
using memory_space = typename AA_Internal_dev::device_type::memory_space;

Adelus::AdelusHandle<value_type, execution_space, memory_space>
ahandle(0, MPI_COMM_WORLD, *matrix_size, *num_procsr, *num_rhs );
lusolve_( ahandle, AA_i_dev, secs );

Kokkos::deep_copy( AA_i, AA_i_dev );
#else//OpenMP
#ifdef PRINT_STATUS
printf("FactorSolve_hostPtr with host solve (double complex pointer interface) in rank %d -- my_rows %u , my_cols %u, my_rhs %u , matrix_size %u, num_procs_per_row %u, num_rhs %u\n", rank, my_rows_, my_cols_, my_rhs_, *matrix_size, *num_procsr, *num_rhs);
#endif

using value_type = typename AA_Internal::value_type;
using execution_space = typename AA_Internal::device_type::execution_space;
using memory_space = typename AA_Internal::device_type::memory_space;

Adelus::AdelusHandle<value_type, execution_space, memory_space>
ahandle(0, MPI_COMM_WORLD, *matrix_size, *num_procsr, *num_rhs );
lusolve_( ahandle, AA_i, secs );
#endif
}
}
Expand Down Expand Up @@ -357,6 +504,72 @@ namespace Adelus {
#endif

lusolve_(ahandle, AA_i, secs);
#endif
}
}

/// Adelus FactorSolve_hostPtr (old interface)
/// Matrix and rhs are packed and passed as host pointer

inline
void FactorSolve_hostPtr( ADELUS_DATA_TYPE* AA,
int my_rows_,
int my_cols_,
int my_rhs_,
int* matrix_size,
int* num_procsr,
int* num_rhs,
double* secs ) {
int rank;

MPI_Comm_rank(MPI_COMM_WORLD, &rank) ;

{ // Note: To avoid segmentation fault when FactorSolve is called multiple times with the unmanaged View, it's safest to make sure unmanaged View falls out of scope before freeing its memory.
typedef Kokkos::View<double**,
Kokkos::LayoutLeft,
Kokkos::HostSpace,
Kokkos::MemoryTraits<Kokkos::Unmanaged> > AA_Internal;

AA_Internal AA_i(reinterpret_cast<double *>(AA), my_rows_, my_cols_ + my_rhs_ + 6);

#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
typedef Kokkos::View<double**,
Kokkos::LayoutLeft,
#ifdef KOKKOS_ENABLE_CUDA
Kokkos::CudaSpace> AA_Internal_dev;
#else
Kokkos::Experimental::HIPSpace> AA_Internal_dev;
#endif

AA_Internal_dev AA_i_dev( "AA_i_dev", my_rows_, my_cols_ + my_rhs_ + 6 );

#ifdef PRINT_STATUS
printf("FactorSolve_hostPtr with CUDA solve (double pointer interface) in rank %d -- my_rows %u , my_cols %u, my_rhs %u , matrix_size %u, num_procs_per_row %u, num_rhs %u\n", rank, my_rows_, my_cols_, my_rhs_, *matrix_size, *num_procsr, *num_rhs);
#endif

Kokkos::deep_copy( AA_i_dev, AA_i );

using value_type = typename AA_Internal_dev::value_type;
using execution_space = typename AA_Internal_dev::device_type::execution_space;
using memory_space = typename AA_Internal_dev::device_type::memory_space;

Adelus::AdelusHandle<value_type, execution_space, memory_space>
ahandle(0, MPI_COMM_WORLD, *matrix_size, *num_procsr, *num_rhs );
lusolve_( ahandle, AA_i_dev, secs );

Kokkos::deep_copy( AA_i, AA_i_dev );
#else//OpenMP
#ifdef PRINT_STATUS
printf("FactorSolve_hostPtr with host solve (double pointer interface) in rank %d -- my_rows %u , my_cols %u, my_rhs %u , matrix_size %u, num_procs_per_row %u, num_rhs %u\n", rank, my_rows_, my_cols_, my_rhs_, *matrix_size, *num_procsr, *num_rhs);
#endif

using value_type = typename AA_Internal::value_type;
using execution_space = typename AA_Internal::device_type::execution_space;
using memory_space = typename AA_Internal::device_type::memory_space;

Adelus::AdelusHandle<value_type, execution_space, memory_space>
ahandle(0, MPI_COMM_WORLD, *matrix_size, *num_procsr, *num_rhs );
lusolve_( ahandle, AA_i, secs );
#endif
}
}
Expand Down Expand Up @@ -449,6 +662,72 @@ namespace Adelus {
#endif

lusolve_(ahandle, AA_i, secs);
#endif
}
}

/// Adelus FactorSolve_hostPtr (old interface)
/// Matrix and rhs are packed and passed as host pointer

inline
void FactorSolve_hostPtr( ADELUS_DATA_TYPE* AA,
int my_rows_,
int my_cols_,
int my_rhs_,
int* matrix_size,
int* num_procsr,
int* num_rhs,
double* secs ) {
int rank;

MPI_Comm_rank(MPI_COMM_WORLD, &rank) ;

{ // Note: To avoid segmentation fault when FactorSolve is called multiple times with the unmanaged View, it's safest to make sure unmanaged View falls out of scope before freeing its memory.
typedef Kokkos::View<Kokkos::complex<float>**,
Kokkos::LayoutLeft,
Kokkos::HostSpace,
Kokkos::MemoryTraits<Kokkos::Unmanaged> > AA_Internal;

AA_Internal AA_i(reinterpret_cast<Kokkos::complex<float> *>(AA), my_rows_, my_cols_ + my_rhs_ + 6);

#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
typedef Kokkos::View<Kokkos::complex<float>**,
Kokkos::LayoutLeft,
#ifdef KOKKOS_ENABLE_CUDA
Kokkos::CudaSpace> AA_Internal_dev;
#else
Kokkos::Experimental::HIPSpace> AA_Internal_dev;
#endif

AA_Internal_dev AA_i_dev( "AA_i_dev", my_rows_, my_cols_ + my_rhs_ + 6 );

#ifdef PRINT_STATUS
printf("FactorSolve_hostPtr with CUDA solve (float complex pointer interface) in rank %d -- my_rows %u , my_cols %u, my_rhs %u , matrix_size %u, num_procs_per_row %u, num_rhs %u\n", rank, my_rows_, my_cols_, my_rhs_, *matrix_size, *num_procsr, *num_rhs);
#endif

Kokkos::deep_copy( AA_i_dev, AA_i );

using value_type = typename AA_Internal_dev::value_type;
using execution_space = typename AA_Internal_dev::device_type::execution_space;
using memory_space = typename AA_Internal_dev::device_type::memory_space;

Adelus::AdelusHandle<value_type, execution_space, memory_space>
ahandle(0, MPI_COMM_WORLD, *matrix_size, *num_procsr, *num_rhs );
lusolve_( ahandle, AA_i_dev, secs );

Kokkos::deep_copy( AA_i, AA_i_dev );
#else//OpenMP
#ifdef PRINT_STATUS
printf("FactorSolve_hostPtr with host solve (float complex pointer interface) in rank %d -- my_rows %u , my_cols %u, my_rhs %u , matrix_size %u, num_procs_per_row %u, num_rhs %u\n", rank, my_rows_, my_cols_, my_rhs_, *matrix_size, *num_procsr, *num_rhs);
#endif

using value_type = typename AA_Internal::value_type;
using execution_space = typename AA_Internal::device_type::execution_space;
using memory_space = typename AA_Internal::device_type::memory_space;

Adelus::AdelusHandle<value_type, execution_space, memory_space>
ahandle(0, MPI_COMM_WORLD, *matrix_size, *num_procsr, *num_rhs );
lusolve_( ahandle, AA_i, secs );
#endif
}
}
Expand Down Expand Up @@ -541,6 +820,72 @@ namespace Adelus {
#endif

lusolve_(ahandle, AA_i, secs);
#endif
}
}

/// Adelus FactorSolve_hostPtr (old interface)
/// Matrix and rhs are packed and passed as host pointer

inline
void FactorSolve_hostPtr( ADELUS_DATA_TYPE* AA,
int my_rows_,
int my_cols_,
int my_rhs_,
int* matrix_size,
int* num_procsr,
int* num_rhs,
double* secs ) {
int rank;

MPI_Comm_rank(MPI_COMM_WORLD, &rank) ;

{ // Note: To avoid segmentation fault when FactorSolve is called multiple times with the unmanaged View, it's safest to make sure unmanaged View falls out of scope before freeing its memory.
typedef Kokkos::View<float**,
Kokkos::LayoutLeft,
Kokkos::HostSpace,
Kokkos::MemoryTraits<Kokkos::Unmanaged> > AA_Internal;

AA_Internal AA_i(reinterpret_cast<float *>(AA), my_rows_, my_cols_ + my_rhs_ + 6);

#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
typedef Kokkos::View<float**,
Kokkos::LayoutLeft,
#ifdef KOKKOS_ENABLE_CUDA
Kokkos::CudaSpace> AA_Internal_dev;
#else
Kokkos::Experimental::HIPSpace> AA_Internal_dev;
#endif

AA_Internal_dev AA_i_dev( "AA_i_dev", my_rows_, my_cols_ + my_rhs_ + 6 );

#ifdef PRINT_STATUS
printf("FactorSolve_hostPtr with CUDA solve (float pointer interface) in rank %d -- my_rows %u , my_cols %u, my_rhs %u , matrix_size %u, num_procs_per_row %u, num_rhs %u\n", rank, my_rows_, my_cols_, my_rhs_, *matrix_size, *num_procsr, *num_rhs);
#endif

Kokkos::deep_copy( AA_i_dev, AA_i );

using value_type = typename AA_Internal_dev::value_type;
using execution_space = typename AA_Internal_dev::device_type::execution_space;
using memory_space = typename AA_Internal_dev::device_type::memory_space;

Adelus::AdelusHandle<value_type, execution_space, memory_space>
ahandle(0, MPI_COMM_WORLD, *matrix_size, *num_procsr, *num_rhs );
lusolve_( ahandle, AA_i_dev, secs );

Kokkos::deep_copy( AA_i, AA_i_dev );
#else//OpenMP
#ifdef PRINT_STATUS
printf("FactorSolve_hostPtr with host solve (float pointer interface) in rank %d -- my_rows %u , my_cols %u, my_rhs %u , matrix_size %u, num_procs_per_row %u, num_rhs %u\n", rank, my_rows_, my_cols_, my_rhs_, *matrix_size, *num_procsr, *num_rhs);
#endif

using value_type = typename AA_Internal::value_type;
using execution_space = typename AA_Internal::device_type::execution_space;
using memory_space = typename AA_Internal::device_type::memory_space;

Adelus::AdelusHandle<value_type, execution_space, memory_space>
ahandle(0, MPI_COMM_WORLD, *matrix_size, *num_procsr, *num_rhs );
lusolve_( ahandle, AA_i, secs );
#endif
}
}
Expand Down

0 comments on commit 1118703

Please sign in to comment.