Skip to content

Commit

Permalink
Merge pull request #99 from Adrian-Diaz/Adrian's-Branch
Browse files Browse the repository at this point in the history
Tpetra distributed multivector vector wrappers + skeleton for future distributed vectors
  • Loading branch information
Adrian-Diaz authored Oct 2, 2024
2 parents b9a399a + d3fa311 commit d7ecb7f
Show file tree
Hide file tree
Showing 9 changed files with 2,060 additions and 21 deletions.
80 changes: 75 additions & 5 deletions examples/main_kokkos.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**********************************************************************************************
© 2020. Triad National Security, LLC. All rights reserved.
2020. Triad National Security, LLC. All rights reserved.
This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos
National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S.
Department of Energy/National Nuclear Security Administration. All rights in the program are
Expand Down Expand Up @@ -866,19 +866,20 @@ int main(int argc, char* argv[])
});

// Hierarchical


printf("\n\n\nHierarchical\n");
size_t hiersize = 4;
auto hierTest1D = CArrayKokkos<double>(hiersize);
auto hierTest2D = CArrayKokkos<double>(hiersize, hiersize);
auto hierTest3D = CArrayKokkos<double>(hiersize, hiersize, hiersize);
FOR_ALL(i_i, 0, hiersize, j_j, 0, hiersize, k_k, 0, hiersize, {
hierTest3D(i_i, j_j, k_k) = 0.0;
});
FOR_FIRST(hiersize, {
FOR_FIRST(i_i,hiersize, {
// Kokkos::parallel_for( \
//Kokkos::TeamPolicy<>( 32, Kokkos::AUTO, 32 ), \
//KOKKOS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type &teamMember ) {
const int i_i = TEAM_ID;
//const int i_i = TEAM_ID;
FOR_SECOND(j_j, i_i, hiersize, {
// Kokkos::parallel_for( \
//Kokkos::TeamThreadRange( teamMember, istart, iend ), [&] ( const int (j_j) ) {
Expand All @@ -892,13 +893,82 @@ int main(int argc, char* argv[])
});
});
Kokkos::fence();
printf("\n\n\nHierarchical\n");
for (int ppp = 0; ppp < hiersize; ppp++) {
// printf("%f\n", hierTest3D(0,0,ppp));
// printf("%f\n", hierTest2D(3,ppp));
// printf("%f\n", hierTest3D(3,3,ppp));
}
//printf("\n\n");

// Hierarchical reductions

FOR_ALL(i_i, 0, hiersize, j_j, 0, hiersize, k_k, 0, hiersize, {
hierTest3D(i_i, j_j, k_k) = i_i*hiersize*hiersize+j_j*hiersize+k_k;
});

printf("\n\n\nHierarchical Reduce\n");
//2D nesting
FOR_FIRST(i_i,hiersize, {
// Kokkos::parallel_for( \
//Kokkos::TeamPolicy<>( 32, Kokkos::AUTO, 32 ), \
//KOKKOS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type &teamMember ) {
//const int i_i = TEAM_ID;
double result = 0;
double lsum;
FOR_REDUCE_SUM_SECOND(j_j, i_i, hiersize, lsum, {
lsum += hierTest3D(i_i,j_j,0);
// Kokkos::parallel_for( \
//Kokkos::TeamThreadRange( teamMember, istart, iend ), [&] ( const int (j_j) ) {
// hierTest2D(i_i,j_j) = i_i * (j_j+1);
// int jstart = j_j*32;
// int jend = (j_j+1)*32;
}, result);
hierTest1D(i_i)= result;
//printf("value at %d is %f\n", i_i, hierTest1D(i_i));
});
Kokkos::fence();
for (int ppp = 0; ppp < hiersize; ppp++) {
//printf("%f\n", hierTest1D(ppp));
// printf("%f\n", hierTest2D(3,ppp));
// printf("%f\n", hierTest3D(3,3,ppp));
}
printf("\n\n");

printf("\n\n\nHierarchical Vectorized Reduce\n");
//3D vector nesting
FOR_FIRST(i_i,hiersize, {
// Kokkos::parallel_for( \
//Kokkos::TeamPolicy<>( 32, Kokkos::AUTO, 32 ), \
//KOKKOS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type &teamMember ) {
//const int i_i = TEAM_ID;
double result = 0;
double lsum;
FOR_SECOND(j_j, i_i, hiersize, {
// Kokkos::parallel_for( \
//Kokkos::TeamThreadRange( teamMember, istart, iend ), [&] ( const int (j_j) ) {
// hierTest2D(i_i,j_j) = i_i * (j_j+1);
// int jstart = j_j*32;
// int jend = (j_j+1)*32;
FOR_REDUCE_SUM_THIRD(k_k, i_i, j_j, lsum, {
lsum += hierTest3D(i_i,j_j,k_k);
// Kokkos::parallel_for( \
//Kokkos::TeamThreadRange( teamMember, istart, iend ), [&] ( const int (j_j) ) {
// hierTest2D(i_i,j_j) = i_i * (j_j+1);
// int jstart = j_j*32;
// int jend = (j_j+1)*32;
}, result);
hierTest2D(i_i,j_j)= result;
//printf("value at %d , %d is %f\n", i_i, j_j, hierTest2D(i_i,j_j));
});
});
Kokkos::fence();
for (int ppp = 0; ppp < hiersize; ppp++) {
//printf("%f\n", hierTest1D(ppp));
// printf("%f\n", hierTest2D(3,ppp));
// printf("%f\n", hierTest3D(3,3,ppp));
}
printf("\n\n");

} // end of kokkos scope

Kokkos::finalize();
Expand Down
135 changes: 135 additions & 0 deletions src/include/communication_plan.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
#ifndef COMMUNICATION_PLAN_H
#define COMMUNICATION_PLAN_H
/**********************************************************************************************
© 2020. Triad National Security, LLC. All rights reserved.
This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos
National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S.
Department of Energy/National Nuclear Security Administration. All rights in the program are
reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear
Security Administration. The Government is granted for itself and others acting on its behalf a
nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare
derivative works, distribute copies to the public, perform publicly and display publicly, and
to permit others to do so.
This program is open source under the BSD-3 License.
Redistribution and use in source and binary forms, with or without modification, are permitted
provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of
conditions and the following disclaimer in the documentation and/or other materials
provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors may be used
to endorse or promote products derived from this software without specific prior
written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************************/

#include "host_types.h"
#include "kokkos_types.h"
#include <typeinfo>
#ifdef HAVE_MPI
#include <mpi.h>
#include "partition_map.h"

namespace mtr
{

/////////////////////////
/* CommunicationPlan: Class storing relevant data and functions to perform comms between two different MATAR MPI types.
The object for this class should not be reconstructed if the same comm plan is needed repeatedly; the setup is expensive.
The comms routines such as execute_comms can be called repeatedly to avoid repeated setup of the plan.*/
/////////////////////////
template <typename T, typename Layout = DefaultLayout, typename ExecSpace = DefaultExecSpace, typename MemoryTraits = void>
class CommunicationPlan {

// this is manage
using TArray1D = Kokkos::DualView <T*, Layout, ExecSpace, MemoryTraits>;

protected:

public:

/*forward comms means communicating data to a vector that doesn't have a unique distribution of its global
indices amongst processes from a vector that does have a unique distribution amongst processes.
An example of forward comms in a finite element application would be communicating ghost data from
the vector of local data.
reverse comms means communicating data to a vector that has a unique distribution of its global
indices amongst processes from a vector that does not have a unique distribution amongst processes.
An example of reverse comms in a finite element application would be communicating force contributions from ghost
indices via summation to the entries of the uniquely owned vector that stores final tallies of forces.
*/
bool reverse_comms_flag; //default is false

CommunicationPlan();

//Copy Constructor
CommunicationPlan(const CommunicationPlan<T, Layout, ExecSpace,MemoryTraits> &temp){
*this = temp;
}

CommunicationPlan(bool reverse_comms);

KOKKOS_INLINE_FUNCTION
CommunicationPlan& operator=(const CommunicationPlan& temp);

// Deconstructor
virtual KOKKOS_INLINE_FUNCTION
~CommunicationPlan ();

virtual execute_comms(){}
}; // End of CommunicationPlan


// Default constructor
template <typename T, typename Layout, typename ExecSpace, typename MemoryTraits>
CommunicationPlan<T,Layout,ExecSpace,MemoryTraits>::CommunicationPlan() {

}

// Overloaded 1D constructor
template <typename T, typename Layout, typename ExecSpace, typename MemoryTraits>
CommunicationPlan<T,Layout,ExecSpace,MemoryTraits>::CommunicationPlan(bool reverse_comms) {
reverse_comms_flag = reverse_comms;
}


template <typename T, typename Layout, typename ExecSpace, typename MemoryTraits>
KOKKOS_INLINE_FUNCTION
CommunicationPlan<T,Layout,ExecSpace,MemoryTraits>& CommunicationPlan<T,Layout,ExecSpace,MemoryTraits>::operator= (const CommunicationPlan& temp) {

// Do nothing if the assignment is of the form x = x
if (this != &temp) {
reverse_comms_flag = reverse_comms_flag;
}

return *this;
}

template <typename T, typename Layout, typename ExecSpace, typename MemoryTraits>
KOKKOS_INLINE_FUNCTION
CommunicationPlan<T,Layout,ExecSpace,MemoryTraits>::~CommunicationPlan() {}

////////////////////////////////////////////////////////////////////////////////
// End of CommunicationPlan
////////////////////////////////////////////////////////////////////////////////

} // end namespace

#endif // end if have MPI

#endif // COMMUNICATION_PLAN_H

26 changes: 16 additions & 10 deletions src/include/kokkos_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -8040,11 +8040,14 @@ void DynamicRaggedRightArrayKokkos<T,Layout,ExecSpace,MemoryTraits>::set_values(
template <typename T, typename Layout, typename ExecSpace, typename MemoryTraits>
KOKKOS_INLINE_FUNCTION
void DynamicRaggedRightArrayKokkos<T,Layout,ExecSpace,MemoryTraits>::set_values_sparse(T val) {
Kokkos::parallel_for( Kokkos::TeamPolicy<>( dim1_, Kokkos::AUTO, 32 ), KOKKOS_CLASS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type &teamMember ) {
const int i_i = teamMember.league_rank();
Kokkos::parallel_for( Kokkos::TeamThreadRange( teamMember, 0, stride_(i_i) ), [&] ( const int (j_j) ) {
array_(dim2_*i_i+j_j) = val;
});
// Kokkos::parallel_for( Kokkos::TeamPolicy<>( dim1_, Kokkos::AUTO, 32 ), KOKKOS_CLASS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type &teamMember ) {
// const int i_i = teamMember.league_rank();
// Kokkos::parallel_for( Kokkos::TeamThreadRange( teamMember, 0, stride_(i_i) ), [&] ( const int (j_j) ) {
// array_(dim2_*i_i+j_j) = val;
// });
// });
Kokkos::parallel_for("SetValues_DynamicRaggedRightArrayKokkos", length_, KOKKOS_CLASS_LAMBDA(const int i) {
array_(i) = val;
});
}
// Get the name of the view
Expand Down Expand Up @@ -8256,11 +8259,14 @@ void DynamicRaggedDownArrayKokkos<T,Layout,ExecSpace,MemoryTraits>::set_values(T
template <typename T, typename Layout, typename ExecSpace, typename MemoryTraits>
KOKKOS_INLINE_FUNCTION
void DynamicRaggedDownArrayKokkos<T,Layout,ExecSpace,MemoryTraits>::set_values_sparse(T val) {
Kokkos::parallel_for( Kokkos::TeamPolicy<>( dim2_, Kokkos::AUTO, 32 ), KOKKOS_CLASS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type &teamMember ) {
const int j_j = teamMember.league_rank();
Kokkos::parallel_for( Kokkos::TeamThreadRange( teamMember, 0, stride_(j_j) ), [&] ( const int (i_i) ) {
array_(dim1_*j_j+i_i) = val;
});
// Kokkos::parallel_for( Kokkos::TeamPolicy<>( dim2_, Kokkos::AUTO, 32 ), KOKKOS_CLASS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type &teamMember ) {
// const int j_j = teamMember.league_rank();
// Kokkos::parallel_for( Kokkos::TeamThreadRange( teamMember, 0, stride_(j_j) ), [&] ( const int (i_i) ) {
// array_(dim1_*j_j+i_i) = val;
// });
// });
Kokkos::parallel_for("SetValues_DynamicRaggedDownArrayKokkos", length_, KOKKOS_CLASS_LAMBDA(const int i) {
array_(i) = val;
});
}
// Get the name of the view
Expand Down
16 changes: 14 additions & 2 deletions src/include/macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -445,24 +445,36 @@ THREAD_ID \
teamMember.team_rank()

#define \
FOR_FIRST(x1, fcn) \
FOR_FIRST(i, x1, fcn) \
Kokkos::parallel_for( \
Kokkos::TeamPolicy<>( x1, Kokkos::AUTO, 32 ), \
KOKKOS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type &teamMember ) \
{fcn} )
{ const int i = TEAM_ID; fcn} )

#define \
FOR_SECOND(j, y0, y1, fcn) \
Kokkos::parallel_for( \
Kokkos::TeamThreadRange( teamMember, y0, y1 ), [&] ( const int (j) ) \
{fcn} )

#define \
FOR_REDUCE_SUM_SECOND(j, y0, y1, lsum, fcn, result) \
Kokkos::parallel_reduce( \
Kokkos::TeamThreadRange( teamMember, y0, y1 ), [&] ( const int (j), decltype(lsum) &(lsum) ) \
{fcn}, result )

#define \
FOR_THIRD(k, z0, z1, fcn) \
Kokkos::parallel_for( \
Kokkos::ThreadVectorRange( teamMember, z0, z1 ), [&] ( const int (k) ) \
{fcn} )

#define \
FOR_REDUCE_SUM_THIRD(k, z0, z1, lsum, fcn, result) \
Kokkos::parallel_reduce( \
Kokkos::ThreadVectorRange( teamMember, z0, z1 ), [&] ( const int (k), decltype(lsum) &(lsum) ) \
{fcn}, result )

//Kokkos Initialize
#define \
MATAR_KOKKOS_INIT \
Expand Down
Loading

0 comments on commit d7ecb7f

Please sign in to comment.