From 7058cb38476b93066f3c18be08630835d84f590b Mon Sep 17 00:00:00 2001 From: Ryan Aronson Date: Wed, 3 Jul 2024 10:54:04 -0700 Subject: [PATCH] Comments/Documentation for New Ghosting Architecture (#3189) * added comments for global numbering (Ryan's version) * comments for constructing owned and present mesh data (Ryan's version) * comments for construction of alocal djacency matrix data by each rank (Ryan's version) * Comments through ghostingFootprint (Ryan's version) * Comments through construction of GhostSend/GhostRecv for each rank (Ryan's version) * complete comments for performGhosting (Ryan's version) * Update comments about sorting following fix * finished comments of doTheNewGhosting (Ryan's version) * make new ghosting active/deactive by setting command line flag (-g 1) --- .../common/initializeEnvironment.hpp | 3 + .../mainInterface/ProblemManager.cpp | 20 +- .../mainInterface/ProblemManager.hpp | 7 +- .../mainInterface/initialization.cpp | 7 + .../mesh/generators/BuildPods.cpp | 37 +- .../mesh/generators/NewGhosting.cpp | 370 ++++++- .../mesh/generators/NewGlobalNumbering.cpp | 986 +++++++++--------- 7 files changed, 902 insertions(+), 528 deletions(-) diff --git a/src/coreComponents/common/initializeEnvironment.hpp b/src/coreComponents/common/initializeEnvironment.hpp index 044b1977294..b91ac24dc80 100644 --- a/src/coreComponents/common/initializeEnvironment.hpp +++ b/src/coreComponents/common/initializeEnvironment.hpp @@ -88,6 +88,9 @@ struct CommandLineOptions /// Print memory usage in data repository real64 printMemoryUsage = -1.0; + + /// Use new ghosting strategy + integer useNewGhosting = 0; }; /** diff --git a/src/coreComponents/mainInterface/ProblemManager.cpp b/src/coreComponents/mainInterface/ProblemManager.cpp index 631ec0efb4c..564249ae166 100644 --- a/src/coreComponents/mainInterface/ProblemManager.cpp +++ b/src/coreComponents/mainInterface/ProblemManager.cpp @@ -142,11 +142,11 @@ ProblemManager::ProblemManager( conduit::Node & root ): setRestartFlags( RestartFlags::WRITE ). setDescription( "Whether to disallow using pinned memory allocations for MPI communication buffers." ); - m_useNewGhosting = 1; -// registerWrapper( viewKeysStruct::useNewGhostingString(), &m_useNewGhosting ). -// setInputFlag( InputFlags::OPTIONAL ). -// setApplyDefaultValue( 0 ). -// setDescription( "Controls the use of the new ghosting implementation." ); + commandLine.registerWrapper< integer >( viewKeys.useNewGhosting.key( ) ). + setApplyDefaultValue( 0 ). + setRestartFlags( RestartFlags::WRITE ). + setDescription( "Whether to use new ghosting strategy" ); + } ProblemManager::~ProblemManager() @@ -191,6 +191,7 @@ void ProblemManager::parseCommandLineInput() commandLine.getReference< integer >( viewKeys.overridePartitionNumbers ) = opts.overridePartitionNumbers; commandLine.getReference< integer >( viewKeys.useNonblockingMPI ) = opts.useNonblockingMPI; commandLine.getReference< integer >( viewKeys.suppressPinned ) = opts.suppressPinned; + commandLine.getReference< integer >( viewKeys.useNewGhosting ) = opts.useNewGhosting; string & outputDirectory = commandLine.getReference< string >( viewKeys.outputDirectory ); outputDirectory = opts.outputDirectory; @@ -633,10 +634,12 @@ void ProblemManager::generateMesh() GEOS_MARK_FUNCTION; DomainPartition & domain = getDomainPartition(); + Group const & commandLine = getGroup< Group >( groupKeys.commandLine ); + MeshManager & meshManager = this->getGroup< MeshManager >( groupKeys.meshManager ); - GEOS_LOG_RANK( "m_useNewGhosting = " << m_useNewGhosting ); - meshManager.generateMeshes( m_useNewGhosting, domain ); + GEOS_LOG_RANK( "useNewGhosting = " << commandLine.getReference< integer >( viewKeys.useNewGhosting ) ); + meshManager.generateMeshes( commandLine.getReference< integer >( viewKeys.useNewGhosting ), domain ); // get all the discretizations from the numerical methods. // map< pair< mesh body name, pointer to discretization>, array of region names > @@ -658,7 +661,7 @@ void ProblemManager::generateMesh() } else { - if( m_useNewGhosting ) + if( commandLine.getReference< integer >( viewKeys.useNewGhosting ) ) { GEOS_LOG_RANK_0( "Generating the mesh levels for the new ghosting." ); generateMeshLevelFreeFct( meshBody.getMeshMappings(), @@ -685,7 +688,6 @@ void ProblemManager::generateMesh() } } ); - Group const & commandLine = this->getGroup< Group >( groupKeys.commandLine ); integer const useNonblockingMPI = commandLine.getReference< integer >( viewKeys.useNonblockingMPI ); // domain.setupBaseLevelMeshGlobalInfo(); diff --git a/src/coreComponents/mainInterface/ProblemManager.hpp b/src/coreComponents/mainInterface/ProblemManager.hpp index ed4635a7d4d..7a201463085 100644 --- a/src/coreComponents/mainInterface/ProblemManager.hpp +++ b/src/coreComponents/mainInterface/ProblemManager.hpp @@ -230,9 +230,7 @@ class ProblemManager : public dataRepository::Group dataRepository::ViewKey outputDirectory = {"outputDirectory"}; ///< Output directory key dataRepository::ViewKey useNonblockingMPI = {"useNonblockingMPI"}; ///< Flag to use non-block MPI key dataRepository::ViewKey suppressPinned = {"suppressPinned"}; ///< Flag to suppress use of pinned memory key - constexpr static char const * useNewGhostingString() - { return "useNewGhosting"; } - + dataRepository::ViewKey useNewGhosting = {"useNewGhosting"}; ///< Flag to use new ghosting strategies } viewKeys; ///< Command line input viewKeys /// Child group viewKeys @@ -385,9 +383,6 @@ class ProblemManager : public dataRepository::Group /// The FieldSpecificationManager FieldSpecificationManager * m_fieldSpecificationManager; - - /// Whether we should use the new ghosting implementation. - integer m_useNewGhosting = 0; }; } /* namespace geos */ diff --git a/src/coreComponents/mainInterface/initialization.cpp b/src/coreComponents/mainInterface/initialization.cpp index 75ea5de3b61..0d28cfaf534 100644 --- a/src/coreComponents/mainInterface/initialization.cpp +++ b/src/coreComponents/mainInterface/initialization.cpp @@ -103,6 +103,7 @@ std::unique_ptr< CommandLineOptions > parseCommandLineOptions( int argc, char * TRACE_DATA_MIGRATION, MEMORY_USAGE, PAUSE_FOR, + NEW_GHOST, }; const option::Descriptor usage[] = @@ -123,6 +124,7 @@ std::unique_ptr< CommandLineOptions > parseCommandLineOptions( int argc, char * { TRACE_DATA_MIGRATION, 0, "", "trace-data-migration", Arg::None, "\t--trace-data-migration, \t Trace host-device data migration" }, { MEMORY_USAGE, 0, "m", "memory-usage", Arg::nonEmpty, "\t-m, --memory-usage, \t Minimum threshold for printing out memory allocations in a member of the data repository." }, { PAUSE_FOR, 0, "", "pause-for", Arg::numeric, "\t--pause-for, \t Pause geosx for a given number of seconds before starting execution" }, + { NEW_GHOST, 0, "g", "new-ghosting", Arg::numeric, "\t-g, --new-ghosting, \t Use new ghosting strategy" }, { 0, 0, nullptr, nullptr, nullptr, nullptr } }; @@ -237,6 +239,11 @@ std::unique_ptr< CommandLineOptions > parseCommandLineOptions( int argc, char * std::this_thread::sleep_for( std::chrono::seconds( duration ) ); } break; + case NEW_GHOST: + { + commandLineOptions->useNewGhosting = 1; + } + break; } } diff --git a/src/coreComponents/mesh/generators/BuildPods.cpp b/src/coreComponents/mesh/generators/BuildPods.cpp index 6a00f09ef24..acb25238665 100644 --- a/src/coreComponents/mesh/generators/BuildPods.cpp +++ b/src/coreComponents/mesh/generators/BuildPods.cpp @@ -55,11 +55,14 @@ std::map< GI, toLocIdx_t< GI > > buildGlobalToLocalMap( std::set< GI > const & g return g2l; } +// Consolidate the mesh graphs on the current rank into one object MeshGraph mergeMeshGraph( MeshGraph const & owned, MeshGraph const & present, MeshGraph const & ghosts ) { + // inititalize the consolidated version with owned MeshGraph result{ owned }; + // insert the present and ghost data for( MeshGraph const & graph: { present, ghosts } ) { result.c2f.insert( std::cbegin( graph.c2f ), std::cend( graph.c2f ) ); @@ -89,6 +92,7 @@ struct UpwardMappings std::map< NodeLocIdx, std::vector< CellLocIdx > > n2c; }; +// converts generic map to arrayOfArrays template< class T, class U > ArrayOfArrays< localIndex > convertToAoA( std::map< T, std::vector< U > > const & t2u ) { @@ -138,6 +142,7 @@ array2d< localIndex, P > convertToA2d( std::map< T, std::vector< U > > const & t return t2u_; } +// converts map to unordered map template< class GI > unordered_map< globalIndex, localIndex > convertGlobalToLocalMap( std::map< GI, toLocIdx_t< GI > > const & g2l ) { @@ -150,6 +155,7 @@ unordered_map< globalIndex, localIndex > convertGlobalToLocalMap( std::map< GI, return g2l_; } +// removes MPIRank typed int, changes from std::vector to array1d template< typename LI > std::map< integer, array1d< localIndex > > toFlavorlessMapping( std::map< MpiRank, std::vector< LI > > const & input ) { @@ -165,7 +171,7 @@ std::map< integer, array1d< localIndex > > toFlavorlessMapping( std::map< MpiRan } return output; -}; +} EdgeMgrImpl makeFlavorlessEdgeMgrImpl( std::size_t const & numEdges, std::map< EdgeLocIdx, std::tuple< NodeLocIdx, NodeLocIdx > > const & e2n, @@ -219,6 +225,8 @@ FaceMgrImpl makeFlavorlessFaceMgrImpl( std::size_t const & numFaces, toFlavorlessMapping( recv ) ); } +// Function to create a nodeManager from our local upward mapping data +// By flavorless, we are indicating that we are changing data-structures to GEOS ones, etc NodeMgrImpl makeFlavorlessNodeMgrImpl( std::size_t const & numNodes, std::map< NodeGlbIdx, std::array< double, 3 > > const & n2pos, std::map< NodeLocIdx, std::vector< EdgeLocIdx > > const & n2e, @@ -282,6 +290,7 @@ DownwardMappings buildDownwardMappings( GlobalToLocal const & g2l, DownwardMappings res; // Building the `e2n` (edges to nodes) mapping + // Simply looping through the meshgraph and converting to local indices for( auto const & [egi, ngis]: graph.e2n ) { NodeLocIdx const nli0 = g2l.nodes.at( std::get< 0 >( ngis ) ); @@ -290,8 +299,10 @@ DownwardMappings buildDownwardMappings( GlobalToLocal const & g2l, } // Building the `f2n` (face to nodes) and `f2e` (faces to edges) mappings + // Loop through meshgraph for( auto const & [fgi, edgeInfos]: graph.f2e ) { + // convert to face local index FaceLocIdx const & fli = g2l.faces.at( fgi ); std::vector< NodeLocIdx > & nodes = res.f2n[fli]; @@ -300,18 +311,23 @@ DownwardMappings buildDownwardMappings( GlobalToLocal const & g2l, nodes.reserve( std::size( edgeInfos ) ); edges.reserve( std::size( edgeInfos ) ); + // Loop over the edges connected to the face for( EdgeInfo const & edgeInfo: edgeInfos ) { + // use the meshgraph again to go from edge to node, convert to local indices to fill face2node std::tuple< NodeGlbIdx, NodeGlbIdx > const & ngis = graph.e2n.at( edgeInfo.index ); nodes.emplace_back( edgeInfo.start == 0 ? g2l.nodes.at( std::get< 0 >( ngis ) ) : g2l.nodes.at( std::get< 1 >( ngis ) ) ); + // just convert edge global id to local to fill face2edge edges.emplace_back( g2l.edges.at( edgeInfo.index ) ); } } // Building the `c2n` (cell to nodes), `c2e` (cell to edges) and `c2f` (cell to faces) mappings + // loop thorugh meshgraph cells2face for( auto const & [cgi, faceInfos]: graph.c2f ) { + // convert to cell local index CellLocIdx const & cli = g2l.cells.at( cgi ); std::vector< FaceLocIdx > & faces = res.c2f[cli]; @@ -321,12 +337,14 @@ DownwardMappings buildDownwardMappings( GlobalToLocal const & g2l, edges.reserve( std::size( faceInfos ) ); // c2f + // simple conversion of global to local index is all thats needed for( FaceInfo const & faceInfo: faceInfos ) { faces.emplace_back( g2l.faces.at( faceInfo.index ) ); } // c2e + // loop over the faces in the cell and collect all their edges std::set< EdgeLocIdx > tmpEdges; for( FaceLocIdx const & fli: faces ) { @@ -336,16 +354,19 @@ DownwardMappings buildDownwardMappings( GlobalToLocal const & g2l, edges.assign( std::cbegin( tmpEdges ), std::cend( tmpEdges ) ); // c2n + // Note how we are hard-coded for hexs FaceInfo const & bottomFace = faceInfos.at( 4 ); // (0, 3, 2, 1) // TODO depends on element type. FaceInfo const & topFace = faceInfos.at( 5 ); // (4, 5, 6, 7) std::vector< NodeLocIdx > const & bottomNodes = res.f2n.at( g2l.faces.at( bottomFace.index ) ); std::vector< NodeLocIdx > const & topNodes = res.f2n.at( g2l.faces.at( topFace.index ) ); + // reserFaceNodes resets the order using the flipped and start info std::vector< NodeLocIdx > const bn = resetFaceNodes( bottomNodes, bottomFace.isFlipped, bottomFace.start ); std::vector< NodeLocIdx > const tn = resetFaceNodes( topNodes, topFace.isFlipped, topFace.start ); // TODO carefully check the ordering... + // looks like this is geos order std::array< NodeLocIdx, 8 > const tmp{ bn[0], bn[3], bn[2], bn[1], tn[0], tn[1], tn[2], tn[3] }; res.c2n[cli] = { tmp[0], tmp[1], tmp[3], tmp[2], tmp[4], tmp[5], tmp[7], tmp[6] }; } @@ -494,8 +515,12 @@ void buildPods( MeshGraph const & owned, GhostSend const & send, MeshMappingImpl & meshMappings ) { + // start by merging the 3 mesh graphs for the different types of data on the graph into 1 struct MeshGraph const graph = mergeMeshGraph( owned, present, ghosts ); + // create GlobalToLocal, which maps global IDs to rank local IDs for (nodes, edges, faces, cells) + // mapKeys is just a utility which extracts keys + // buildGlobalToLocalMap just takes the global ids and maps them to 1, 2, 3, ... GlobalToLocal const g2l{ buildGlobalToLocalMap( mapKeys< std::set >( graph.n2pos ) ), buildGlobalToLocalMap( mapKeys< std::set >( graph.e2n ) ), @@ -503,9 +528,19 @@ void buildPods( MeshGraph const & owned, buildGlobalToLocalMap( mapKeys< std::set >( graph.c2f ) ) }; + // Now we build the mappings used by GEOS + // Downward - edges2nodes, faces2edges, faces2nodes, cells2faces, cells2edges, cells2nodes + // The difference is that these use local indices, thus we built g2l + // there are assumptions that we are working on hexs here DownwardMappings const downwardMappings = buildDownwardMappings( g2l, graph ); + // invert to downward mappings to get e2f, f2c, n2e, n2f, n2c UpwardMappings const upwardMappings = buildUpwardMappings( downwardMappings ); + // Now we can make our node/edge/face/cell manager + // NodeMgrImpl inherits from nodeManager (see pods.hpp) (interface with getters and setters) + // We basically have all the data, this function just casts it into the proper GEOS types + // same for edges, faces, cells + meshMappings.setEdgeMgr( makeFlavorlessEdgeMgrImpl( std::size( g2l.edges ), downwardMappings.e2n, upwardMappings.e2f, diff --git a/src/coreComponents/mesh/generators/NewGhosting.cpp b/src/coreComponents/mesh/generators/NewGhosting.cpp index 48b9f635242..953801b6541 100644 --- a/src/coreComponents/mesh/generators/NewGhosting.cpp +++ b/src/coreComponents/mesh/generators/NewGhosting.cpp @@ -88,11 +88,17 @@ MaxGlbIdcs gatherOffset( vtkSmartPointer< vtkDataSet > mesh, return *std::max_element( s.begin(), s.end() ); }; + // MaxGlbIdcs is just a struct of the 4 index types (ints under the hood) + // edge and face max (global) index values are passed in, + // we use a little lambda to get the node and cell max indices MaxGlbIdcs const offsets{ NodeGlbIdx{ extract( mesh->GetPointData()->GetGlobalIds() ) }, maxEdgeId, maxFaceId, CellGlbIdx{ extract( mesh->GetCellData()->GetGlobalIds() ) } }; + // I would have to look in detail about what these MPI lines do, + // But im fairly sure they are just handling type stuff so that you can do communications + // with the MaxGlbIdcs type // Otherwise, use `MPI_Type_create_struct`. static_assert( std::is_same_v< NodeGlbIdx::UnderlyingType, EdgeGlbIdx::UnderlyingType > ); static_assert( std::is_same_v< NodeGlbIdx::UnderlyingType, FaceGlbIdx::UnderlyingType > ); @@ -107,6 +113,8 @@ MaxGlbIdcs gatherOffset( vtkSmartPointer< vtkDataSet > mesh, MPI_Op op; MPI_Op_create( g, true, &op ); + // Do an allReduce so that every rank knows the max index for (nodes, edges, faces, cells) + // g is the custom reduction function which takes max for each (nodes, edges, faces, cells) MaxGlbIdcs result( offsets ); MPI_Allreduce( &offsets, &result, 1, t, op, MPI_COMM_WORLD ); @@ -167,13 +175,25 @@ std::tuple< MeshGraph, MeshGraph > buildMeshGraph( vtkSmartPointer< vtkDataSet > BucketOffsets const & offsets, MpiRank curRank ) { + // MeshGraph (defined in BuildPods.hpp) is just a struct of maps which map: + // cell global index to vector of entries of type (face global index, extra stuff defining orientation of face) for each of the adjacent faces + // face global index to vector of entries of type (edge global index plus orientation) for each of the adjacent edges + // edge global index to tuple of node global indices + // node global index to 3d array of position + // We distinguish between the owned data and the present data + // present data is stuff that is owned by another rank, but is already present on the current rank because of the boundary exchange done previously. + // Note that present just includes the 2D boundary between ranks, we still need the cell info, the other faces, edges, nodes which make up that cell + // (and further if we wanted deeper ghosting) MeshGraph owned, present; + // Any (node, edge, face, cell) is owned by the lowest rank on which it appears + // so we define a lambda to compute if the current rank owns some piece of data auto const isCurrentRankOwning = [&curRank]( std::set< MpiRank > const & ranks ) -> bool { return curRank == *std::min_element( std::cbegin( ranks ), std::cend( ranks ) ); }; + // Get the nodal positions for nodes on current rank mesh and store in appropriate MeshGraph (owned/present) std::map< NodeGlbIdx, vtkIdType > const n2vtk = buildNgiToVtk( mesh ); for( auto const & [ranks, nodes]: buckets.nodes ) { @@ -185,10 +205,13 @@ std::tuple< MeshGraph, MeshGraph > buildMeshGraph( vtkSmartPointer< vtkDataSet > } } + // Loop through edge data and assign to the owned/present graph for( auto const & [ranks, edges]: buckets.edges ) { auto & e2n = isCurrentRankOwning( ranks ) ? owned.e2n : present.e2n; - EdgeGlbIdx egi = offsets.edges.at( ranks ); // TODO hack + // im not sure why this is a 'hack', this seems like the way I would do it + // namely get the edge offset for this bucket, then count upwards in global Id for each edge in bucket + EdgeGlbIdx egi = offsets.edges.at( ranks ); // TODO hack for( Edge const & edge: edges ) { e2n[egi] = edge; @@ -203,33 +226,40 @@ std::tuple< MeshGraph, MeshGraph > buildMeshGraph( vtkSmartPointer< vtkDataSet > e2n.insert( std::cbegin( m ), std::cend( m ) ); } - // Simple inversion + // Simple inversion of the e2n (which didnt care about ownership) to a node2edge (which therefore also doesnt care about ownership) + // this is used in constructing the face2edge data below std::map< std::tuple< NodeGlbIdx, NodeGlbIdx >, EdgeGlbIdx > n2e; for( auto const & [e, n]: e2n ) { n2e[n] = e; // TODO what about ownership? } + // Loop through the face data to assign to owned/present graph for( auto const & [ranks, faces]: buckets.faces ) { auto & f2e = isCurrentRankOwning( ranks ) ? owned.f2e : present.f2e; + // grab initial index for faces in this bucket and then loop over these faces FaceGlbIdx fgi = offsets.faces.at( ranks ); for( Face face: faces ) // Intentional copy for the future `emplace_back`. { - face.emplace_back( face.front() ); // Trick to build the edges. + face.emplace_back( face.front() ); // Trick to build the edges. Namely you need to build the edge that connects the last node to the first for( std::size_t i = 0; i < face.size() - 1; ++i ) { + // An edge is just the pair of node global indices, plus a boolean telling you the order + // This is placed into an EdgeInfo struct (defined in buildPods.hpp, but its just this info) NodeGlbIdx const & n0 = face[i], & n1 = face[i + 1]; std::pair< NodeGlbIdx, NodeGlbIdx > const p0 = std::make_pair( n0, n1 ); std::pair< NodeGlbIdx, NodeGlbIdx > const p1 = std::minmax( n0, n1 ); - EdgeInfo const info = { n2e.at( p1 ), std::uint8_t{ p0 != p1 } }; - f2e[fgi].emplace_back( info ); + EdgeInfo const info = { n2e.at( p1 ), std::uint8_t{ p0 != p1 } }; // n2e is used to get the edge global index from the node global indices + f2e[fgi].emplace_back( info ); // assign data to proper graph } - ++fgi; + ++fgi; // increment the offset for the bucket } } + // create a map from node global indices to face global index which does not care about ownership, similar to n2e above + // this will be used in generating cell2face data, like how n2e was used in generating face2edge std::map< std::vector< NodeGlbIdx >, FaceGlbIdx > n2f; for( auto const & [ranks, faces]: buckets.faces ) { @@ -250,6 +280,7 @@ std::tuple< MeshGraph, MeshGraph > buildMeshGraph( vtkSmartPointer< vtkDataSet > // TODO copy paste? for( auto f = 0; f < cell->GetNumberOfFaces(); ++f ) { + // for each of the faces in the cell, get the global ids of the nodes vtkCell * face = cell->GetFace( f ); vtkIdList * pids = face->GetPointIds(); std::vector< NodeGlbIdx > faceNodes( pids->GetNumberOfIds() ); @@ -259,9 +290,17 @@ std::tuple< MeshGraph, MeshGraph > buildMeshGraph( vtkSmartPointer< vtkDataSet > vtkIdType const ngi = globalPtIds->GetValue( nli ); faceNodes[i] = NodeGlbIdx{ ngi }; } + + // these nodes will be in the vtk ordering, we want a consistent ordering for each face + // reorderFaceNodes (defined in NewGlobalNumbering) does this by starting with the smallest node index, + // and then iterating in the direction of the smaller of its neighbors bool isFlipped; std::uint8_t start; std::vector< NodeGlbIdx > const reorderedFaceNodes = reorderFaceNodes( faceNodes, isFlipped, start ); + + // add the face data to the vector for this cell in owned graph. + // note there will never be any 'present' cells, as the earlier exchange only communicates the 2D boundary btwn ranks + // note we are using the n2f built above to go from the vector of nodes (properly ordered) to the face global ID owned.c2f[gci].emplace_back( FaceInfo{ n2f.at( reorderedFaceNodes ), isFlipped, start } ); } } @@ -322,13 +361,27 @@ TriCrsMatrix multiply( int commSize, Teuchos::RCP< TriMap const > ownedMap = upward.getRowMap(); Teuchos::RCP< TriMap const > mpiMap = indicator.getRangeMap(); - // Upward (n -> e -> f -> c) + // row j of indicator was just ones in columns corresponding to present entities on rank j + // row i of upward was constructed as having nonzeros in the columns of entities on which the entity corresponding to row i directly depended + // Begin graph traversal by matrix matrix multiplying + // Trilinos matrix matrix multiply https://docs.trilinos.org/dev/packages/tpetra/doc/html/namespaceTpetra_1_1MatrixMatrix.html#a4c3132fa56ba6d5b071f053486529111 + + // Upward (n -> e -> f -> c) + + // u0_0 (num_entities x num_rank) = upward (num_entities x num_entities) * indicator^T (num_entities x num_rank) + // Using the `upward` interpretation, multiplying upward by each column of indicator^T would give + // a column vector noting the all the faces in the global mesh which contain the edges owned by that rank, all the global cells which contain the owned faces, etc + // These are the colums of the result `result_u0_0` TriCrsMatrix result_u0_0( ownedMap, commSize ); Tpetra::MatrixMatrix::Multiply( upward, false, indicator, true, result_u0_0, false ); result_u0_0.fillComplete( mpiMap, ownedMap ); Tpetra::MatrixMarket::Writer< TriCrsMatrix >::writeSparseFile( "/tmp/matrices/result-0.mat", result_u0_0 ); + // We repeat this process to make sure we catch all the dependencies, but im not convinced once isnt enough + // In theory, you iterate until the matrix stops changing + // TODO: test this hypothesis + // TODO: you might also be able to just do one multiplication with upward + upward^T to do everything at once, but perhaps decoding becomes an issue TriCrsMatrix result_u0_1( ownedMap, commSize ); Tpetra::MatrixMatrix::Multiply( upward, false, result_u0_0, false, result_u0_1, false ); result_u0_1.fillComplete( mpiMap, ownedMap ); @@ -339,8 +392,14 @@ TriCrsMatrix multiply( int commSize, result_u0_2.fillComplete( mpiMap, ownedMap ); Tpetra::MatrixMarket::Writer< TriCrsMatrix >::writeSparseFile( "/tmp/matrices/result-2.mat", result_u0_2 ); - // Downward (c -> f -> e -> n) + // Now we know, for each rank, all the geometrical quantities in the global mesh which include something from that rank + // Downward (c -> f -> e -> n) + // Note that we still need to do a traversal in the "opposite direction" + // Consider starting with some edge, and applying the process above. + // You will recover the faces containing it, and the cells which contain those + // What you wont get, for example, is any other (faces, edges, nodes) in that cell + // Thus we repeat the process with downward = upward^T TriCrsMatrix result_d0_0( ownedMap, commSize ); Tpetra::MatrixMatrix::Multiply( upward, true, result_u0_2, false, result_d0_0, false ); result_d0_0.fillComplete( mpiMap, ownedMap ); @@ -358,9 +417,16 @@ TriCrsMatrix multiply( int commSize, MpiWrapper::barrier(); + // Now we know, for each rank, the set of all global geometric entities which are included by any geometric entity that includes something from that rank + // (what a mouthful) + // Note that we have NOT made a distinction between present and owned on the rank though + return result_d0_2; } +// Helper class which converts between matrix indices and (node, edge, face, cell) indices. +// The matrix rows/cols represent all the nodes, then all the edges, then all the faces, then all the cells +// so conversion just means adding/subtracting the proper offsets class FindGeometricalType { public: @@ -454,6 +520,14 @@ class FindGeometricalType TriGlbIdx const m_numEntries; }; +// Helper function to encode geometrical information into ints which can be entered into adjacency matrix +// For example, a face is connected to multiple edges, so in the face row, there will be a nonzero entry in the col for each edge +// But if we just put a 1 in each we lose the information like what order the edges were in and what order the nodes were in +// So instead we put in a value such that we can reconstruct this info if needed later +// For encoding the edges within a face, N = 2, basis = NumEdges, array = {start node (0,1), index of edge within face}, +// so that result = start_node*numEdges+index_in_face +// For encoding faces within a cell, N = 3, basis = numFaces, array = { isFlipped, start_index, index of face in cell }, +// so that result = (isFlipped*numFaces + start_index)*numFaces + index_in_cell template< std::uint8_t N > std::size_t encode( std::size_t const & basis, std::array< std::size_t, N > const & array ) @@ -489,6 +563,7 @@ std::array< std::size_t, N > decode( std::size_t const & basis, return result; } +// Structure which describes the adjacency matrix for the mesh graph with the data on current rank struct Adjacency { std::vector< TriGlbIdx > ownedNodesIdcs; @@ -499,16 +574,20 @@ struct Adjacency std::vector< std::vector< TriScalarInt > > values; }; +// Function to populate the adjacency struct defined directly above +// Note that we really build I+A, as in the end we will essentially be doing graph traversal by multiplying by (I+A)^n, ((I+A)^T)^n +// Including I gives you everything in the graph 'up to' n steps away, without it you only get things 'exactly' n steps away Adjacency buildAdjacency( MeshGraph const & owned, MeshGraph const & present, FindGeometricalType const & convert ) { Adjacency adjacency; + // Num matrix indices owned by the current rank, and the num indices corresponding to the 'present' data std::size_t const numOwned = std::size( owned.n2pos ) + std::size( owned.e2n ) + std::size( owned.f2e ) + std::size( owned.c2f ); std::size_t const numOther = std::size( present.n2pos ) + std::size( present.e2n ) + std::size( present.f2e ); - // Aliases + // Aliases for the data in adjacency std::vector< TriGlbIdx > & ownedNodesIdcs = adjacency.ownedNodesIdcs; std::vector< TriGlbIdx > & ownedGlbIdcs = adjacency.ownedGlbIdcs; std::vector< TriGlbIdx > & presentGlbIdcs = adjacency.presentGlbIdcs; @@ -516,6 +595,7 @@ Adjacency buildAdjacency( MeshGraph const & owned, std::vector< std::vector< TriGlbIdx > > & indices = adjacency.indices; std::vector< std::vector< TriScalarInt > > & values = adjacency.values; + // we can go ahead and set sizes for the current rank matrix data ownedNodesIdcs.reserve( std::size( owned.n2pos ) ); ownedGlbIdcs.reserve( numOwned ); presentGlbIdcs.reserve( numOther ); @@ -523,6 +603,9 @@ Adjacency buildAdjacency( MeshGraph const & owned, indices.reserve( numOwned ); values.reserve( numOwned ); + // fill the matrix index data for the 'present' gemetrical entities (nodes, edges, faces) + // Note that here we just note the indices, we dont fill any matrix entry or anything + // The filling of the matrix entries is done by the owning rank for( auto const & [ngi, _]: present.n2pos ) { presentGlbIdcs.emplace_back( convert.fromNodeGlbIdx( ngi ) ); @@ -535,9 +618,11 @@ Adjacency buildAdjacency( MeshGraph const & owned, { presentGlbIdcs.emplace_back( convert.fromFaceGlbIdx( fgi ) ); } - std::sort( std::begin( presentGlbIdcs ), std::end( presentGlbIdcs ) ); - GEOS_ASSERT_EQ( numOther, std::size( presentGlbIdcs ) ); + std::sort( std::begin( presentGlbIdcs ), std::end( presentGlbIdcs ) ); // I think that the data in n2pos, e2n, f2e was not necessarily sorted, should double check this as sorting caused a bug in owned data + GEOS_ASSERT_EQ( numOther, std::size( presentGlbIdcs ) ); // ensure we added the correct amount of stuff + + // Now do owned data for( auto const & [ngi, _]: owned.n2pos ) { // Nodes depend on no other geometrical entity, @@ -557,11 +642,15 @@ Adjacency buildAdjacency( MeshGraph const & owned, // To keep the symmetry with the faces (see below), // - we store the local index (0 or 1) to express this information. // - we store the number of underlying nodes (2) on the diagonal. - size_t constexpr numNodes = std::tuple_size_v< decltype( nodes ) >; + size_t constexpr numNodes = std::tuple_size_v< decltype( nodes ) >; // should always be 2 + // This rank owns this row of the matrix ownedGlbIdcs.emplace_back( convert.fromEdgeGlbIdx( egi ) ); numEntriesPerRow.push_back( numNodes + 1 ); // `+1` comes from the diagonal + + // for the row correxponding to this edge global index, have entries in col corresponding to the + // node global indices it connects, and an entry on the diagonal indices.emplace_back( std::vector< TriGlbIdx >{ convert.fromNodeGlbIdx( std::get< 0 >( nodes ) ), convert.fromNodeGlbIdx( std::get< 1 >( nodes ) ), ownedGlbIdcs.back() } ); @@ -575,24 +664,33 @@ Adjacency buildAdjacency( MeshGraph const & owned, // Faces point to multiple edges, but their edges can be flipped w.r.t. the canonical edges // (ie minimal node global index comes first). // In order not to lose this information (held by an `EdgeInfo` instance), we serialise - // - the `EdgeInfo` instance, + // - the `EdgeInfo` instance, (recall `EdgeInfo` is just struct with edge global ID and start node (0,1), see BuildPods.hpp) // - plus the order in which the edges are appearing, // as an integer and store it as an entry in the matrix. // On the diagonal, we'll store the number of edges of the face. std::size_t const numEdges = std::size( edges ); + // This rank owns this row of the matrix ownedGlbIdcs.emplace_back( convert.fromFaceGlbIdx( fgi ) ); numEntriesPerRow.push_back( numEdges + 1 ); // `+1` comes from the diagonal + + // go ahead and add a vector of the correct size to represent the col indices and values for this row of the matrix (not yet populated) std::vector< TriGlbIdx > & ind = indices.emplace_back( numEntriesPerRow.back() ); std::vector< TriScalarInt > & val = values.emplace_back( numEntriesPerRow.back() ); + + // populate the data for this row of the matrix for( std::size_t i = 0; i < numEdges; ++i ) { + // col index is just obtained from edge global id in EdgeInfo for this edge EdgeInfo const & edgeInfo = edges[i]; ind[i] = convert.fromEdgeGlbIdx( edgeInfo.index ); + // use a single int for each edge col index to represent if the edge is flipped and position of edge in face + // in this case v = 1 + start_node*numEdges + index_in_face (again add 1 so that zero indexing becomes 1 indexing) std::size_t const v = 1 + encode< 2 >( numEdges, { edgeInfo.start, i } ); val[i] = intConv< TriScalarInt >( v ); } + // Add diagonal data ind.back() = ownedGlbIdcs.back(); val.back() = intConv< TriScalarInt >( numEdges ); } @@ -600,22 +698,33 @@ Adjacency buildAdjacency( MeshGraph const & owned, { // The same comment as for faces and edges applies for cells and faces (see above). // The main differences being that - // - the `FaceInfo` as a little more information, - // - the diagonal stores the type of the cell which conveys the number of face, nodes, ordering... + // - the `FaceInfo` as a little more information, + // (recall FaceInfo is a struct which holds the global face index, the starting node index, and boolean telling which direction the nodes should be traversed) + // - the diagonal stores the type of the cell which conveys the number of face, nodes, ordering... (note this is still a TODO, right now its just numFaces) std::size_t const numFaces = std::size( faces ); + // This rank owns this row of the matrix ownedGlbIdcs.emplace_back( convert.fromCellGlbIdx( cgi ) ); numEntriesPerRow.push_back( numFaces + 1 ); // `+1` comes from the diagonal + + // go ahead and add a vector of the correct size to represent the col indices and values for this row of the matrix (not yet populated) std::vector< TriGlbIdx > & ind = indices.emplace_back( numEntriesPerRow.back() ); std::vector< TriScalarInt > & val = values.emplace_back( numEntriesPerRow.back() ); + + // populate the data for this row of the matrix for( std::size_t i = 0; i < numFaces; ++i ) { + // col index is just gotten from the global id in FaceInfo for this face FaceInfo const & faceInfo = faces[i]; ind[i] = convert.fromFaceGlbIdx( faceInfo.index ); + + // Encode data with a single int like above, but now int tells us start_node, direction (flipped), and position of face in cell + // in this case v = 1 + (isFlipped*numFaces + start_index)*numFaces + index_in_cell (again add 1 so that zero indexing becomes 1 indexing) std::size_t const v = 1 + encode< 3 >( numFaces, { faceInfo.isFlipped, faceInfo.start, i } ); val[i] = intConv< TriScalarInt >( v ); } + // add diagonal data ind.back() = ownedGlbIdcs.back(); val.back() = intConv< TriScalarInt >( numFaces ); // TODO This should be Hex and the not the number of faces... } @@ -637,13 +746,20 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const & MaxGlbIdcs const & gis, MpiRank curRank ) { + // Trilinos tools package smart reference counting pointer, see + // https://docs.trilinos.org/dev/packages/teuchos/doc/html/classTeuchos_1_1RCP.html#details + // https://docs.trilinos.org/dev/packages/teuchos/doc/html/RefCountPtrBeginnersGuideSAND.pdf using Teuchos::RCP; + // instantiate class which converts between matrix index and (node, edge, face, cell) index + // the max global indices of (node, edge, face, cell) describe where we change geometrical elements in the matrix indices, + // where all the geometrical entities are stacked one after the other FindGeometricalType const convert( gis.nodes, gis.edges, gis.faces, gis.cells ); using Geom = FindGeometricalType::Geom; std::size_t const n = convert.numEntries(); // Total number of entries in the graph. + // Build the adjacency matrix data for this rank (each rank fills in the values for geometry that it 'owns', but notes the geometry that is 'present') Adjacency const adjacency = buildAdjacency( owned, present, convert ); std::size_t const numOwned = std::size( adjacency.ownedGlbIdcs ); std::size_t const numPresent = std::size( adjacency.presentGlbIdcs ); @@ -656,45 +772,67 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const & std::vector< std::vector< TriGlbIdx > > const & indices = adjacency.indices; std::vector< std::vector< TriScalarInt > > const & values = adjacency.values; + // Makes a Trilinos tools MPI communicator and return smart pointer to it RCP< TriComm const > const comm = make_rcp< Teuchos::MpiComm< int > const >( MPI_COMM_GEOSX ); + // Makes a const TriMap, passing the arguments to the constructor of TriMap, returns smart pointer to it + // note TriMap = Tpetra::Map< TriLocIdx, TriGlbIdx >, created using this constructor https://docs.trilinos.org/dev/packages/tpetra/doc/html/classTpetra_1_1Map.html#a4a21c6654fc9fb6db05644a52cc0128d + // Ultimately just collecting who owns what parts of the global matrix auto const ownedMap = make_rcp< TriMap const >( Tpetra::global_size_t( n ), ownedGlbIdcs.data(), TriLocIdx( numOwned ), TriGlbIdx{ 0 }, comm ); // The `upward` matrix offers a representation of the graph connections. // The matrix is square. Each size being the number of geometrical entities. + // The matrix is just the assembed, global adjacency. + // Note that the matrix will end up being lower triangular, and banded because edge rows only have nonzeros in the node columns, faces only have nonzeros with edges, etc + // Quick note on why we call it upward: + // Consider a vector where we put a 1 in the row corresponding to some edge with matrix index i. + // Then multiply upward times this vector. The result has nonzeros in the same row I (because we filled the diagonal of the matrix) + // and also any other rows where column i had nonzeros, which is exactly the indices of the faces containing the edge. + // Note TriCrsMatrix = Tpetra::CrsMatrix< TriScalarInt , TriLocIdx, TriGlbIdx >; https://docs.trilinos.org/dev/packages/tpetra/doc/html/classTpetra_1_1CrsMatrix.html TriCrsMatrix upward( ownedMap, numEntriesPerRow() ); + // Fill the global matrix rows that this rank owns + //(note that this is where the sorting things I mentioned above becomes an issue, if you reorder only the ownedGlobalIDs you assign the wrong rows) for( std::size_t i = 0; i < numOwned; ++i ) { std::vector< TriGlbIdx > const & rowIndices = indices[i]; std::vector< TriScalarInt > const & rowValues = values[i]; GEOS_ASSERT_EQ( std::size( rowIndices ), numEntriesPerRow[i] ); GEOS_ASSERT_EQ( std::size( rowValues ), numEntriesPerRow[i] ); + //https://docs.trilinos.org/dev/packages/tpetra/doc/html/classTpetra_1_1CrsMatrix.html#a8d4784081c59f27391ad825c4dae5e86 upward.insertGlobalValues( ownedGlbIdcs[i], TriLocIdx( std::size( rowIndices ) ), rowValues.data(), rowIndices.data() ); } + // Signal that we are done changing the values/structure of the global adjacency matrix + // https://docs.trilinos.org/dev/packages/tpetra/doc/html/classTpetra_1_1CrsMatrix.html#aa985b225a24d2f74602e25b38b4430af upward.fillComplete( ownedMap, ownedMap ); + + // Note this can cause some issues if /tmp doesnt exist, etc (at least it did in my codespace) Tpetra::MatrixMarket::Writer< TriCrsMatrix >::writeSparseFile( "/tmp/matrices/upward.mat", upward ); int const commSize( MpiWrapper::commSize() ); + // Now we are going to build the various other matrices which will be used in conjunction with the adjacency to ghost + // Now let's build the domain indicator matrix. - // It's rectangular, number of rows being the number of MPI ranks, - // number of columns being the number of nodes in the mesh graph, - // ie the number of geometrical entities in the mesh. + // It's rectangular, number of rows being the number of MPI ranks (so each rank builds its row), + // number of columns being the number of nodes in the mesh graph, ie the number of geometrical entities in the mesh. // It contains one for every time the geometrical entity is present on the rank. // By present, we do not meant that the ranks owns it: just that it's already available on the rank. auto const mpiMap = make_rcp< TriMap const >( Tpetra::global_size_t( commSize ), TriGlbIdx{ 0 }, comm ); // Let the current rank get the appropriate index in this map. TriCrsMatrix indicator( mpiMap, numOwned + numPresent ); + // ones is a vector of size numOwned of 1s, a vector of size numOther of 1s, or a vector with a single entry of 1, whichever is biggest + // this is just a little trick to ensure you have a long enough vector or 1 to enter into the matrix std::vector< TriScalarInt > const ones( std::max( { numOwned, numPresent, std::size_t( 1 ) } ), 1 ); + indicator.insertGlobalValues( TriGlbIdx( curRank.get() ), TriLocIdx( numOwned ), ones.data(), ownedGlbIdcs.data() ); indicator.insertGlobalValues( TriGlbIdx( curRank.get() ), TriLocIdx( numPresent ), ones.data(), presentGlbIdcs.data() ); indicator.fillComplete( ownedMap, mpiMap ); // The `ownership` matrix is a diagonal square matrix. - // Each size being the number of geometrical entities. + // Each size being the number of geometrical entities (so same size as adjacency). // The value of the diagonal term will be the owning rank. - // By means of matrices multiplications, it will be possible to exchange the ownerships information across the ranks. + // By means of matrix multiplications, it will be possible to exchange the ownerships information across the ranks. // TODO Could we use an Epetra_Vector as a diagonal matrix? std::vector< TriScalarInt > myRank( 1, curRank.get() ); TriCrsMatrix ownership( ownedMap, 1 ); @@ -705,6 +843,7 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const & ownership.fillComplete( ownedMap, ownedMap ); Tpetra::MatrixMarket::Writer< TriCrsMatrix >::writeSparseFile( "/tmp/matrices/ownership.mat", ownership ); + // Log some info on these last 2 matrices created for debugging if( curRank == 0_mpi ) { GEOS_LOG_RANK( "indicator.NumGlobalCols() = " << indicator.getGlobalNumCols() ); @@ -716,24 +855,27 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const & // The `ghostingFootprint` matrix is rectangular, // the number of columns being the number of MPI ranks, - // the number of rows being the number of nodes in the mesh graph. + // the number of rows being the number of nodes in the mesh graph, ie the total number of mesh geometric quantities + // (So it is the same size as the transpose of the domain indicator matrix) // - // For any MPI rank (ie column), a non-zero entry means + // For any MPI rank (ie column), a non-zero entry in a row means // that the corresponding geometrical entry has to be eventually present onto the rank // for the ghosting to be effective. // // From `ghostingFootprint` we can extract where the current rank has to send any owned graph node. + // Computed using the custom multiply function defined above, + // which does the equivalent of graph tranversal by repeated multiplications of upward and upward^T TriCrsMatrix ghostingFootprint( multiply( commSize, indicator, upward ) ); ghostingFootprint.resumeFill(); - ghostingFootprint.setAllToScalar( 1. ); + ghostingFootprint.setAllToScalar( 1. ); // We put `1` everywhere there's a non-zero entry, so we'll be able to compose with the `ownership` matrix. ghostingFootprint.fillComplete( mpiMap, ownedMap ); Tpetra::MatrixMarket::Writer< TriCrsMatrix >::writeSparseFile( "/tmp/matrices/ghostingFootprint.mat", ghostingFootprint ); - // We put `1` everywhere there's a non-zero entry, so we'll be able to compose with the `ownership` matrix. // FIXME TODO WARNING From `ghostingFootprint` extract where I have to send // Then, perform the ghostingFootprint * ownership matrix multiplication, // so I get to know from whom I'll receive. + // just some output for sanity checks if( curRank == 0_mpi ) { GEOS_LOG_RANK( "ghosted->NumGlobalCols() = " << ghostingFootprint.getGlobalNumCols() ); @@ -741,21 +883,28 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const & } // The `ghostExchange` matrix is rectangular, - // the number of columns being the number of nodes in the mesh graph, + // the number of columns being the number of nodes in the mesh graph, ie the total number of mesh geometric quantities // the number of rows being the number of MPI ranks. + // ghostExchange (num_ranks x num_entities) = ghostingFootprint^T (num_ranks x num_entities) * ownership (num_entities x num_entities, diagonal) + // recall that ghostExchange has all the nonzeros set to 1 + // diagonal matrix just multiplies each col of ghostingFootprint^T (row of ghostingFootprint) by the corresponding diagonal entry + // So each column of ghostExchange will have nonzeros which are all the same value, with the value being the rank owning that entity + // Thus the nonzeros in row i tell us the ranks which own the entities "adjacent" to the entities on rank i // // As the result of the multiplication between the `ghostingFootprint` matrix and the `ownership` matrix, // for each row owned (ie at the current MPI rank index), - // the value of the `ghostExchange` matrix term will provide the actual owning rank for all the . + // the value of the `ghostExchange` matrix term will provide the actual owning rank for all the needed geometric entities. // // From `ghostExchange` we can extract which other rank will send to the current rank any graph node. - TriCrsMatrix ghostExchange( mpiMap, 10000 ); // TODO having `0` there should be working! + TriCrsMatrix ghostExchange( mpiMap, 10000 ); // TODO having `0` there should be working! We need to get a better estimate of number of entries Tpetra::MatrixMatrix::Multiply( ghostingFootprint, true, ownership, false, ghostExchange, false ); ghostExchange.fillComplete( ownedMap, mpiMap ); // TODO Do I have to work with `ghostingFootprint` if I already have `ghostExchange` which may convey more information? + // As we shall see, below we go back to working with ghostingFootprint // TODO Maybe because of the ownership of the ranks: one is also the "scaled" transposed of the other. + // debug output if( curRank == 0_mpi ) { GEOS_LOG_RANK( "ghostExchange->NumGlobalCols() = " << ghostExchange.getGlobalNumCols() ); @@ -763,20 +912,32 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const & } Tpetra::MatrixMarket::Writer< TriCrsMatrix >::writeSparseFile( "/tmp/matrices/ghostInfo.mat", ghostExchange ); + // Now, for each of the entities owned by this current rank, we collect all the other ranks to which this entity is sent + + // GhostSend defined in BuildPods.hpp, + // struct of maps for each (node, edge, face, cell) giving the set of ranks each entity (given by node, edge, face, cell global index) must be sent to + GhostSend send; + + // temp data structures used in process std::size_t extracted = 0; Teuchos::Array< TriScalarInt > extractedValues; Teuchos::Array< TriGlbIdx > extractedIndices; - GhostSend send; + // Loop over the owned indiced in global matrix (owned geometric entities) for( TriGlbIdx const & index: ownedGlbIdcs ) { + // get the number of ranks which needed this entity and resize our temp data storage std::size_t const length = ghostingFootprint.getNumEntriesInGlobalRow( index ); - extractedValues.resize( length ); + extractedValues.resize( length ); // note we never actually use the values, just needed the indices where the nonzeros were extractedIndices.resize( length ); + // copy data into temp https://docs.trilinos.org/dev/packages/tpetra/doc/html/classTpetra_1_1RowMatrix.html#a5fd9651c8e5d1cc7dead6cadac342978 ghostingFootprint.getGlobalRowCopy( index, extractedIndices(), extractedValues(), extracted ); GEOS_ASSERT_EQ( extracted, length ); + // neighbors will be the set of mpi ranks which need this entity std::set< MpiRank > neighbors; + + // the ranks are the columns of ghostingFootprint, we just extracted the nonzeros for( std::size_t i = 0; i < extracted; ++i ) { MpiRank const rank( extractedIndices[i] ); @@ -786,11 +947,14 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const & } } + // if this entity is not needed by anyone else, great, we're done if( std::empty( neighbors ) ) { continue; } + // if there ranks which need this entity, we convert the global matrix index back to the (node, edge, face cell) global index + // and add to the GhostSend struct switch( convert.getGeometricalType( index ) ) { case Geom::NODE: @@ -818,15 +982,24 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const & GEOS_ERROR( "Internal error" ); } } - } + } // end loop over owned entities + + // Now we can look at the dual problem, namely for this current rank, what data is needed from other ranks + // Now we also have to pay attention to what non-owned data was already present on the rank + // Note that we are back to working with ghostExchange, not ghostingFootprint + // recall that the nonzeros in each row of this matrix told us which other ranks' data was needed for the current row rank std::size_t const numNeededIndices = ghostExchange.getNumEntriesInGlobalRow( TriGlbIdx( curRank.get() ) ); + + // copy into same temp vars as above extractedValues.resize( numNeededIndices ); extractedIndices.resize( numNeededIndices ); ghostExchange.getGlobalRowCopy( TriGlbIdx( curRank.get() ), extractedIndices(), extractedValues(), extracted ); GEOS_ASSERT_EQ( extracted, numNeededIndices ); + // create a set containing the indices needed, i.e. the geometric entities belonging to other ranks (we use a set so we can later use set operations) std::set< TriGlbIdx > const allNeededIndices( std::cbegin( extractedIndices ), std::cend( extractedIndices ) ); + std::set< TriGlbIdx > receivedIndices; // The graph nodes that my neighbors will send me. { std::set< TriGlbIdx > const tmp( std::cbegin( ownedGlbIdcs ), std::cend( ownedGlbIdcs ) ); @@ -834,28 +1007,42 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const & std::cbegin( tmp ), std::cend( tmp ), std::inserter( receivedIndices, std::end( receivedIndices ) ) ); } + // of course, some of these indices were already communicated, they were the `present` data std::vector< TriGlbIdx > notPresentIndices; // The graphs nodes that are nor owned neither present by/on the current rank. std::set_difference( std::cbegin( receivedIndices ), std::cend( receivedIndices ), std::cbegin( presentGlbIdcs ), std::cend( presentGlbIdcs ), std::back_inserter( notPresentIndices ) ); + + // a bit below we will see that we need to actually send the positions of the ghosted nodes (everything else is just indices so far) + // so here we go through the notPresentIndices and make note of which are nodes (as opposed to edges, faces, cells) std::vector< TriGlbIdx > notPresentNodes; std::copy_if( std::cbegin( notPresentIndices ), std::cend( notPresentIndices ), std::back_inserter( notPresentNodes ), [&]( TriGlbIdx const & i ) { return convert.getGeometricalType( i ) == Geom::NODE; } ); - GEOS_ASSERT_EQ( std::size( allNeededIndices ), numNeededIndices ); + + // Finally we can construct our received entity information + // GhostRecv defined in BuildPods.hpp, + // struct of maps for each (node, edge, face, cell) giving the rank each entity (given by node, edge, face, cell global index) is coming from GhostRecv recv; for( std::size_t i = 0; i < extracted; ++i ) { + // matrix index of required entity TriGlbIdx const & index = extractedIndices[i]; + + // Note this is not notPresentIndices, as information which is defined on geometric entities + // which happen to already be present will still need to be communicated in the future if( receivedIndices.find( index ) == std::cend( receivedIndices ) ) // TODO make a map `receivedIndices -> mpi rank` { continue; } - MpiRank const sender( extractedValues[i] ); + // get the owning rank of the required entity + MpiRank const sender( extractedValues[i] ); // finally making use of the values from ghostExchange + + // convert matrix index to geometry index and populate GhostRecv switch( convert.getGeometricalType( index ) ) { case Geom::NODE: @@ -885,13 +1072,14 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const & } } + // debug output // if( curRank == 1_mpi or curRank == 2_mpi ) // { // GEOS_LOG_RANK( "recv.edges = " << json( recv.edges ) ); // GEOS_LOG_RANK( "send.edges = " << json( send.edges ) ); // } - // At this point, each rank knows what it has to send to and what it is going to receive from the other ranks. + // At this point, each rank knows what it has to send and to whom and what it is going to receive from the other ranks. // // The remaining action is about // - retrieving the additional graph information @@ -900,10 +1088,13 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const & // knowing the index of the nodes is not enough. // // In order to do that, we build the `missingIndicator` matrix, which is rectangular: - // - The number of columns is the number of graph nodes in the mesh graph. + // - The number of columns is the number of graph nodes in the mesh graph, ie the total number of geometrical entities in the mesh. // - The number of rows is the total number of graph nodes that are missing on ranks. + // For me this was confusing, as its a little different from what we've done before + // Each rank is missing some quantity of mesh entities, and we are going to MpiAllreduce to get the total across all ranks, and this is the number of rows in the global matrix + // Its going to be incredible sparse, in fact only one nonzero per row marking the index of the entity in the graph // Note that the same quantity can be missing on multiple ranks, and that's handled. - // - The non-zero terms equal `1`, meaning that a given quantity (column) is missing on a given rank (row). + // - The non-zero terms equal `1`, meaning that a given quantity (column) is missing on a given rank (row, each rank owns a chunk of rows). // // Combining `missingIndicator` with the adjacency matrix which conveys a lot of connections information, // we'll be able to create the final `missingIndices` matrix, @@ -924,21 +1115,31 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const & // that actually are physical nodes in the mesh. // - The number of rows is the total number of graph nodes (that actually are physical nodes in the mesh) that are missing on ranks. // - The non-zero terms equal `1`, meaning that a given quantity (column) is missing on a given rank (row). + + // Group together the number of missing mesh entities and the number of missing mesh nodes in particular for this rank std::size_t const numLocMissingCompound[2] = { std::size( notPresentIndices ), std::size( notPresentNodes ) }; + // Now we can communicate to get the sum over missing entities and missing mesh nodes over all the ranks with simple reduction std::size_t numGlbMissingCompound[2] = { 0, 0 }; MpiWrapper::allReduce( numLocMissingCompound, numGlbMissingCompound, 2, MPI_SUM ); + + // break the above back into individual variables now that parallel reduction is done std::size_t const & numLocMissingIndices = numLocMissingCompound[0]; std::size_t const & numLocMissingNodes = numLocMissingCompound[1]; Tpetra::global_size_t const numGlbMissingIndices = numGlbMissingCompound[0]; Tpetra::global_size_t const numGlbMissingNodes = numGlbMissingCompound[1]; - std::size_t const numGlbNodes = gis.nodes.get() + 1; // TODO Use strongly typed integers + std::size_t const numGlbNodes = gis.nodes.get() + 1; // TODO Use strongly typed integers, note gis was the input variable desribing global index offsets std::size_t const numOwnedNodes = std::size( ownedNodesIdcs ); // TODO Use strongly typed integers - auto const missingIndicesMap = make_rcp< TriMap const >( numGlbMissingIndices, numGlbMissingIndices, TriGlbIdx{ 0 }, comm ); + // Now we are going to make new ownership maps for the new matrices describing missing indices/nodes + // Different constructor, just using sizes + // https://docs.trilinos.org/dev/packages/tpetra/doc/html/classTpetra_1_1Map.html#a7984262c1e6ab71ad3717fd96ed76052 + auto const missingIndicesMap = make_rcp< TriMap const >( numGlbMissingIndices, numGlbMissingIndices, TriGlbIdx{ 0 }, comm ); // TODO: Should second one be local?? auto const missingNodesMap = make_rcp< TriMap const >( numGlbMissingNodes, numLocMissingNodes, TriGlbIdx{ 0 }, comm ); // Following information is needed to compute the global row. + // https://docs.trilinos.org/dev/packages/tpetra/doc/html/classTpetra_1_1Map.html#a0124c1b96f046c824123fb0ff5a9c978 + // getting the index in the global matrix corresponding to local index 0 on current rank TriGlbIdx const missingIndicesOffset = missingIndicesMap->getGlobalElement( TriLocIdx{ 0 } ); // Let trilinos provide the offset. TriGlbIdx const missingNodesOffset = missingNodesMap->getGlobalElement( TriLocIdx{ 0 } ); // Let trilinos provide the offset. @@ -946,18 +1147,30 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const & TriCrsMatrix missingIndicator( missingIndicesMap, 1 ); for( std::size_t i = 0; i < numLocMissingIndices; ++i ) { - missingIndicator.insertGlobalValues( missingIndicesOffset + TriGlbIdx( i ), TriLocIdx( 1 ), ones.data(), ¬PresentIndices[i] ); + // every row corresponds to a missing entity on this rank + // simply insert a 1 into the column of the entity given by notPresentIndices + // note each rank owns a chunk of consecutive rows + // https://docs.trilinos.org/dev/packages/tpetra/doc/html/classTpetra_1_1CrsMatrix.html#a8d4784081c59f27391ad825c4dae5e86 + missingIndicator.insertGlobalValues( missingIndicesOffset + TriGlbIdx( i ), TriLocIdx( 1 ), ones.data(), ¬PresentIndices[i] ); // note we are re-using ones from wayy back, just need a pointer to something with 1 } missingIndicator.fillComplete( ownedMap, missingIndicesMap ); - // `missingIndices` will contain the missing connectivity information. - TriCrsMatrix missingIndices( missingIndicesMap, 1000 ); // TODO having `0` there should be working! + // Now build `missingIndices` which will contain the missing connectivity information. Same size as missingIndicator + // missingIndices (num_global_missing_indices x num_entities) = missingIndicator (num_global_missing_indices x num_entities) * upward (num_entities x num_entities) + // every row of missingIndicator just had a 1 in the column which denotes the missing entity, upward is or adjacency matrix + // Similar to how if we do upward * (col indicator vector) we go up the topological chain (1 in edge slot yields faces containing edge) + // when we do (indicator row vector) * upward we get the row of upward corresponding to the indicated column, i.e. the indicated mesh entitiy and everything it depends on + // so if the indicator was on a face, you would get back the edges it is composed of as these are the nonzeros in the result vector + // So the full matrix just has this for every row, where each row is one of the missing entities for one of the ranks + // Complete tangent (sort of) - these pictures always help me with the visualizations of matrix omultiplcations that are very helpful here: https://dzone.com/articles/visualizing-matrix + TriCrsMatrix missingIndices( missingIndicesMap, 1000 ); // TODO having `0` there should be working! (epetra could dynamically resize) This is the same TODO as before, need a way to tell tpetra how many nonzeros Tpetra::MatrixMatrix::Multiply( missingIndicator, false, upward, false, missingIndices, false ); missingIndices.fillComplete( ownedMap, missingIndicesMap ); + // Now we can repeat the process but only consider the nodes, as discussed before // Indicator matrix only for the nodes. // Note that it should be an integer matrix full of ones, - // but it's a double matrix because it's going to be multiplied a double matrix. + // but it's a double matrix because it's going to be multiplied by a double matrix (nodal coordinates). TriDblCrsMatrix missingNodesIndicator( missingNodesMap, 1 ); for( std::size_t i = 0; i < numLocMissingNodes; ++i ) { @@ -972,6 +1185,8 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const & // - Its number of columns is 3: the x, y, and z coordinates of the nodes. TriDblCrsMatrix nodePositions( ownedNodesMap, 3 ); std::vector< TriGlbIdx > const zot{ TriGlbIdx( 0 ), TriGlbIdx( 1 ), TriGlbIdx( 2 ) }; // zot: zero, one, two. + + // each rank fills the nodePositions for the nodes it owns for( auto const & [ngi, pos]: owned.n2pos ) { nodePositions.insertGlobalValues( convert.fromNodeGlbIdx( ngi ), TriLocIdx( 3 ), pos.data(), zot.data() ); @@ -980,22 +1195,36 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const & nodePositions.fillComplete( threeMap, ownedNodesMap ); // `missingNodePos` will contain the missing node positions. - TriDblCrsMatrix missingNodePos( missingNodesMap, 1000 ); // TODO having `0` there should be working! + // Analogous to missingIndices, each row is a node missing from one of the ranks, and the cols are the position of that node + TriDblCrsMatrix missingNodePos( missingNodesMap, 1000 ); // TODO having `0` there should be working! Same TODO for sizing Tpetra::MatrixMatrix::Multiply( missingNodesIndicator, false, nodePositions, false, missingNodePos, false ); missingNodePos.fillComplete( threeMap, missingNodesMap ); + // Okie dokie, with all that we can finally construct the last output, which is the MeshGraph corresponding to ghosted info + // (recall we already had MeshGraphs for the owned and present info) + // (and recall a MeshGraph is just a struct of maps which take you from node/edge/face/cell global index to its data, + // ex: face global id maps to a vector of edges, each of which are encoded by EdgeInfo (so it has id, start node)) + // Notice that this information is exactly what is in missingIndices and missingNodePos! + // Moreoever, we will be able to get back 'all' the information by decoding the entries of missingIndices! MeshGraph ghosts; Teuchos::Array< double > extractedNodePos( 3 ); + + // Loop over the missing mesh entities for( std::size_t i = 0; i < numLocMissingIndices; ++i ) { + // This is the missing entity that will be ghosted, this will be the key in one of the ghosted maps TriGlbIdx const index = notPresentIndices[i]; Geom const geometricalType = convert.getGeometricalType( index ); + // get the number of entities the missing one depends on (downward, c -> f -> e -> n) std::size_t const length = missingIndices.getNumEntriesInGlobalRow( missingIndicesOffset + TriGlbIdx( i ) ); + + // reuse same temp vars from before to store the row data extractedValues.resize( length ); extractedIndices.resize( length ); missingIndices.getGlobalRowCopy( missingIndicesOffset + TriGlbIdx( i ), extractedIndices(), extractedValues(), extracted ); GEOS_ASSERT_EQ( extracted, length ); + if( geometricalType == Geom::NODE ) { // The case of nodes is a bit different from the other cases, @@ -1016,32 +1245,40 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const & continue; } - auto const cit = std::find( std::cbegin( extractedIndices ), std::cend( extractedIndices ), index ); + // Now we are either edge, face, or cell + + // Find the index of the current missing entity in the data from the row of the matrix + auto const cit = std::find( std::cbegin( extractedIndices ), std::cend( extractedIndices ), index ); // I kind of think it should always be the last one, since upward is (lower) triangular + // Get the diagonal value std::size_t const numGeomQuantitiesIdx = intConv< std::size_t >( std::distance( std::cbegin( extractedIndices ), cit ) ); TriScalarInt const & numGeomQuantities = extractedValues[numGeomQuantitiesIdx]; - GEOS_ASSERT_EQ( extracted, intConv< std::size_t >( numGeomQuantities + 1 ) ); + GEOS_ASSERT_EQ( extracted, intConv< std::size_t >( numGeomQuantities + 1 ) ); // extracted is the output from Tilinos telling you how many entries were pulled out when we copy row switch( geometricalType ) { case Geom::EDGE: { TriScalarInt const & numNodes = numGeomQuantities; // Alias - GEOS_ASSERT_EQ( numNodes, 2 ); + GEOS_ASSERT_EQ( numNodes, 2 ); // an edge always has 2 nodes std::array< NodeGlbIdx, 2 > order{}; + // loop over the entries extracted from the row of the matrix for( std::size_t ii = 0; ii < extracted; ++ii ) { + // skip diagonal if( ii == numGeomQuantitiesIdx ) { continue; } + // use `order` to store [start node index, end node index] NodeGlbIdx const ngi = convert.toNodeGlbIdx( extractedIndices[ii] ); TriScalarInt const ord = extractedValues[ii] - 1; GEOS_ASSERT( ord == 0 or ord == 1 ); order[ord] = ngi; } GEOS_ASSERT_EQ( std::size( order ), intConv< std::size_t >( numNodes ) ); + // populate this info into ghosts EdgeGlbIdx const egi = convert.toEdgeGlbIdx( index ); std::tuple< NodeGlbIdx, NodeGlbIdx > & tmp = ghosts.e2n[egi]; std::get< 0 >( tmp ) = order[0]; @@ -1052,19 +1289,27 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const & { TriScalarInt const & numEdges = numGeomQuantities; // Alias std::map< integer, EdgeInfo > order; + + // loop over the entries extracted from the row of the matrix for( std::size_t ii = 0; ii < extracted; ++ii ) { + // skip diagonal if( ii == numGeomQuantitiesIdx ) { continue; } + // get the edge global index and decode the matrix entry to get each edge's start node and the edges index in the face EdgeGlbIdx const egi = convert.toEdgeGlbIdx( extractedIndices[ii] ); + // array is [start node, index within face] std::array< std::size_t, 2 > const decoded = decode< 2 >( numEdges, extractedValues[ii] - 1 ); + // `order` holds all the edge infos in the order the edges appeared in the original face order[decoded[1]] = { egi, intConv< std::uint8_t >( decoded[0] ) }; GEOS_ASSERT( decoded[0] == 0 or decoded[0] == 1 ); } GEOS_ASSERT_EQ( std::size( order ), intConv< std::size_t >( numEdges ) ); + + // use face global index and `order` to populate ghosts FaceGlbIdx const fgi = convert.toFaceGlbIdx( index ); std::vector< EdgeInfo > & tmp = ghosts.f2e[fgi]; tmp.resize( numEdges ); @@ -1079,18 +1324,26 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const & { TriScalarInt const & numFaces = numGeomQuantities; // Alias // TODO This should receive the cell type instead. std::map< integer, FaceInfo > order; + + // loop over the entries extracted from the row of the matrix for( std::size_t ii = 0; ii < extracted; ++ii ) { + // skip diagonal if( ii == numGeomQuantitiesIdx ) { continue; } + // get the face global index and decode the entries to get isFlipped, start index, and index of face within cell FaceGlbIdx const fgi = convert.toFaceGlbIdx( extractedIndices[ii] ); + // array is [isFlipped, start index, and index of face within cell] std::array< std::size_t, 3 > const decoded = decode< 3 >( numFaces, extractedValues[ii] - 1 ); + // `order` holds the face info in the order the faces appeared in the original cell order[decoded[2]] = { fgi, intConv< bool >( decoded[0] ), intConv< std::uint8_t >( decoded[1] ) }; } GEOS_ASSERT_EQ( std::size( order ), intConv< std::size_t >( numFaces ) ); + + // use cell global index and `order` to populate ghosts CellGlbIdx const cgi = convert.toCellGlbIdx( index ); std::vector< FaceInfo > & tmp = ghosts.c2f[cgi]; tmp.resize( numFaces ); @@ -1102,7 +1355,7 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const & } default: { - GEOS_ERROR( "Internal error." ); + GEOS_ERROR( "Internal error in performGhosting. Could not recognize geometric type of mesh entity" ); } } } @@ -1127,6 +1380,7 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const & // GEOS_LOG_RANK( "ghosts_n2ps = " << json( ghosts.n2pos ) ); // } + // Now we are done!!! return { ghosts, recv, send }; } @@ -1134,6 +1388,11 @@ void doTheNewGhosting( vtkSmartPointer< vtkDataSet > mesh, std::set< MpiRank > const & neighbors, MeshMappingImpl & meshMappings ) { + + // First step in ghosting is to have each rank create the global IDs for each entry appearing on it + // buckets is a group of maps from sets of mpi ranks to shared (nodes, edges, faces) + // offsets has the same keys but the values are now the global index for the first entry in the bucket + // the rest of the items in a bucket are numered sequentially following the value of offset auto const [buckets, offsets] = doTheNewGlobalNumbering( mesh, neighbors ); // Now we exchange the data with our neighbors. @@ -1141,10 +1400,20 @@ void doTheNewGhosting( vtkSmartPointer< vtkDataSet > mesh, GEOS_LOG_RANK( "offsets on rank " << curRank << " -> " << json( offsets ) ); + // Now we grab the biggest global indices for each (nodes, edges, faces, cells) as this will help build the adjacency matrix + // gatherOffset does an allReduce of max indices on each rank + // note we use the extra entry in the offsets for the next rank which specifies where it starts to get the max index on this rank + // so we dont have to do any extra counting or anything in the current rank buckets MpiRank const nextRank = curRank + 1_mpi; MaxGlbIdcs const matrixOffsets = gatherOffset( mesh, offsets.edges.at( { nextRank } ) - 1_egi, offsets.faces.at( { nextRank } ) - 1_fgi ); std::cout << "matrixOffsets on rank " << curRank << " -> " << json( matrixOffsets ) << std::endl; + // Now we build to data to describe the mesh data on the current rank. + // owned and present are struct of maps which map: + // cell global index to vector of entries of type (face global index, extra stuff defining orientation of face) for each of the adjacent faces + // face global index to vector of entries of type (edge global index plus orientation) for each of the adjacent edges + // edge global index to tuple of node global indices + // node global index to 3d array of position auto const [owned, present] = buildMeshGraph( mesh, buckets, offsets, curRank ); // TODO change into buildOwnedMeshGraph? // if( curRank == 1_mpi ) // { @@ -1153,8 +1422,17 @@ void doTheNewGhosting( vtkSmartPointer< vtkDataSet > mesh, // } // MpiWrapper::barrier(); + // Next do the actual ghosting - i.e. figure out the information each rank must send to and receive from other ranks + // This is where we use parallel linear algebra package (right now, Trilinos) to build an adjacency matrix for the entire mesh (treated as a graph) + // By doing some clever matrix products we can figure out which ranks need what automagically + // `ghosts` is also a MeshGraph like owned and present, but described the geometric entities which are ghosted onto the rank from another owner + // recv and send are of type GhostRecv and GhostSend (in BuildPods.hpp) and describe what (nodes, edges, faces, cells) each rank needs to receive from and send to others auto const [ghosts, recv, send] = performGhosting( owned, present, matrixOffsets, curRank ); + // Finally, we use everything we have to populate the mappings that interface with the rest of GEOS + // Note that we have already done the ghosting, so these mappings are set once and for all + // Unlike previous implementation, where we populated these based on the current rank info, and then tried to fix any inconsistencies with ghosting + // output is the populated meshMappings, which contains node manager, edge manager, etc. as well as the ghost send/recv buildPods( owned, present, ghosts, recv, send, meshMappings ); } diff --git a/src/coreComponents/mesh/generators/NewGlobalNumbering.cpp b/src/coreComponents/mesh/generators/NewGlobalNumbering.cpp index 62d6c34e61d..e9fc77c6cdb 100644 --- a/src/coreComponents/mesh/generators/NewGlobalNumbering.cpp +++ b/src/coreComponents/mesh/generators/NewGlobalNumbering.cpp @@ -31,583 +31,637 @@ namespace geos::ghosting { -struct Exchange -{ - std::set< NodeGlbIdx > nodes; - std::set< Edge > edges; - std::set< Face > faces; -}; - -void to_json( json & j, - const Exchange & v ) -{ - j = json{ { "nodes", v.nodes }, - { "edges", v.edges }, - { "faces", v.faces } }; -} + struct Exchange + { + std::set nodes; + std::set edges; + std::set faces; + }; -void from_json( const json & j, - Exchange & v ) -{ - v.nodes = j.at( "nodes" ).get< std::set< NodeGlbIdx > >(); - v.edges = j.at( "edges" ).get< std::set< Edge > >(); - v.faces = j.at( "faces" ).get< std::set< Face > >(); -} + void to_json(json &j, + const Exchange &v) + { + j = json{{"nodes", v.nodes}, + {"edges", v.edges}, + {"faces", v.faces}}; + } -array1d< std::uint8_t > convertExchange( Exchange const & exchange ) -{ - std::vector< std::uint8_t > const tmp = json::to_cbor( json( exchange ) ); - array1d< std::uint8_t > result; - result.reserve( tmp.size() ); - for( std::uint8_t const & t: tmp ) + void from_json(const json &j, + Exchange &v) { - result.emplace_back( t ); + v.nodes = j.at("nodes").get>(); + v.edges = j.at("edges").get>(); + v.faces = j.at("faces").get>(); } - return result; -} -Exchange convertExchange( array1d< std::uint8_t > const & input ) -{ - std::vector< std::uint8_t > const tmp( std::cbegin( input ), std::cend( input ) ); - return json::from_cbor( tmp, false ).template get< Exchange >(); -} - -/** - * @brief Extract the cells at the boundary of the mesh. - * @param mesh The vtk mesh. - * @return The vtk cell ids. - */ -std::set< vtkIdType > extractBoundaryCells( vtkSmartPointer< vtkDataSet > mesh ) -{ - // TODO Better handle the boundary information, forgetting about the 3d cells and simply handling the outside shell. - auto f = vtkDataSetSurfaceFilter::New(); - f->PassThroughCellIdsOn(); - f->PassThroughPointIdsOff(); - f->FastModeOff(); - - string const originalCellsKey = "ORIGINAL_CELLS"; - f->SetOriginalCellIdsName( originalCellsKey.c_str() ); - auto boundaryMesh = vtkPolyData::New(); - f->UnstructuredGridExecute( mesh, boundaryMesh ); - vtkIdTypeArray const * originalCells = vtkIdTypeArray::FastDownCast( boundaryMesh->GetCellData()->GetArray( originalCellsKey.c_str() ) ); - - std::set< vtkIdType > boundaryCellIdxs; - for( auto i = 0; i < originalCells->GetNumberOfTuples(); ++i ) + array1d convertExchange(Exchange const &exchange) { - boundaryCellIdxs.insert( originalCells->GetValue( i ) ); + std::vector const tmp = json::to_cbor(json(exchange)); + array1d result; + result.reserve(tmp.size()); + for (std::uint8_t const &t : tmp) + { + result.emplace_back(t); + } + return result; } - return boundaryCellIdxs; -} + Exchange convertExchange(array1d const &input) + { + std::vector const tmp(std::cbegin(input), std::cend(input)); + return json::from_cbor(tmp, false).template get(); + } -Face reorderFaceNodes( std::vector< NodeGlbIdx > const & nodes, bool & isFlipped, std::uint8_t & start ) // TODO unit test -{ - std::size_t const n = nodes.size(); + /** + * @brief Extract the cells at the boundary of the mesh. + * @param mesh The vtk mesh. + * @return The vtk cell ids. + */ + std::set extractBoundaryCells(vtkSmartPointer mesh) + { + // TODO Better handle the boundary information, forgetting about the 3d cells and simply handling the outside shell. + auto f = vtkDataSetSurfaceFilter::New(); + f->PassThroughCellIdsOn(); + f->PassThroughPointIdsOff(); + f->FastModeOff(); + + string const originalCellsKey = "ORIGINAL_CELLS"; + f->SetOriginalCellIdsName(originalCellsKey.c_str()); + auto boundaryMesh = vtkPolyData::New(); + f->UnstructuredGridExecute(mesh, boundaryMesh); + vtkIdTypeArray const *originalCells = vtkIdTypeArray::FastDownCast(boundaryMesh->GetCellData()->GetArray(originalCellsKey.c_str())); + + std::set boundaryCellIdxs; + for (auto i = 0; i < originalCells->GetNumberOfTuples(); ++i) + { + boundaryCellIdxs.insert(originalCells->GetValue(i)); + } - std::vector< NodeGlbIdx > f( nodes ); + return boundaryCellIdxs; + } + // Reorder nodes by starting with the smallest node index, and then iterating in the direction of the smaller of its neighbors + Face reorderFaceNodes(std::vector const &nodes, bool &isFlipped, std::uint8_t &start) // TODO unit test + { + std::size_t const n = nodes.size(); - auto const cit = std::min_element( std::cbegin( nodes ), std::cend( nodes ) ); - auto const minIdx = std::distance( std::cbegin( nodes ), cit ); + std::vector f(nodes); - std::size_t const prevIdx = minIdx == 0 ? n - 1 : minIdx - 1; - std::size_t const nextIdx = minIdx == intConv< int >( n ) - 1 ? 0 : minIdx + 1; + // Get the smallest global node Id in the face + auto const cit = std::min_element(std::cbegin(nodes), std::cend(nodes)); + auto const minIdx = std::distance(std::cbegin(nodes), cit); - start = intConv< std::uint8_t >( minIdx ); - isFlipped = nodes[prevIdx] < nodes[nextIdx]; + // get the adjecent node gloabl IDs in he vector (treated as a circular buffer) + std::size_t const prevIdx = minIdx == 0 ? n - 1 : minIdx - 1; + std::size_t const nextIdx = minIdx == intConv(n) - 1 ? 0 : minIdx + 1; - std::size_t const pivot = isFlipped ? n - 1 - minIdx : minIdx; - if( isFlipped ) - { - std::reverse( std::begin( f ), std::end( f ) ); - } - std::rotate( std::begin( f ), std::begin( f ) + pivot, std::end( f ) ); + // we will order by starting with the smallest ID and then iterating in the direction of its smaller neighbor + // this is encoded by isFlipped boolean (true if we iterate "left" from the start) + start = intConv(minIdx); + isFlipped = nodes[prevIdx] < nodes[nextIdx]; - return f; -} + // return the re-ordered nodes vector, isFlipped and start have also been set by this function + std::size_t const pivot = isFlipped ? n - 1 - minIdx : minIdx; + if (isFlipped) + { + std::reverse(std::begin(f), std::end(f)); + } + std::rotate(std::begin(f), std::begin(f) + pivot, std::end(f)); -std::vector< NodeLocIdx > resetFaceNodes( std::vector< NodeLocIdx > const & nodes, - bool const & isFlipped, - std::uint8_t const & start ) -{ - std::vector< NodeLocIdx > result( nodes ); - if( isFlipped ) - { - std::reverse( std::begin( result ), std::end( result ) ); - std::uint8_t const pivot = std::size( result ) - 1 - start; - std::rotate( std::begin( result ), std::begin( result ) + pivot, std::end( result ) ); + return f; } - else - { - std::rotate( std::rbegin( result ), std::rbegin( result ) + start, std::rend( result ) ); - } - return result; -} - -Exchange buildSpecificData( vtkSmartPointer< vtkDataSet > mesh, - std::set< vtkIdType > const & cellIds ) -{ - vtkIdTypeArray * gids = vtkIdTypeArray::FastDownCast( mesh->GetPointData()->GetGlobalIds() ); - Span< vtkIdType > const globalPtIds( (vtkIdType *) gids->GetPointer( 0 ), gids->GetNumberOfTuples() ); - - std::vector< Edge > tmpEdges; - std::vector< Face > tmpFaces; - // Pre-allocation of the temporary vectors. + std::vector resetFaceNodes(std::vector const &nodes, + bool const &isFlipped, + std::uint8_t const &start) { - std::size_t numEdges = 0; - std::size_t numFaces = 0; - for( vtkIdType const & c: cellIds ) + std::vector result(nodes); + if (isFlipped) + { + std::reverse(std::begin(result), std::end(result)); + std::uint8_t const pivot = std::size(result) - 1 - start; + std::rotate(std::begin(result), std::begin(result) + pivot, std::end(result)); + } + else { - vtkCell * cell = mesh->GetCell( c ); - numEdges += cell->GetNumberOfEdges(); - numFaces += cell->GetNumberOfFaces(); + std::rotate(std::rbegin(result), std::rbegin(result) + start, std::rend(result)); } - tmpEdges.reserve( numEdges ); - tmpFaces.reserve( numFaces ); + return result; } - // Filling the temporary. - for( auto c = 0; c < mesh->GetNumberOfCells(); ++c ) + Exchange buildSpecificData(vtkSmartPointer mesh, + std::set const &cellIds) { - vtkCell * cell = mesh->GetCell( c ); + vtkIdTypeArray *gids = vtkIdTypeArray::FastDownCast(mesh->GetPointData()->GetGlobalIds()); + Span const globalPtIds((vtkIdType *)gids->GetPointer(0), gids->GetNumberOfTuples()); - for( auto e = 0; e < cell->GetNumberOfEdges(); ++e ) - { - vtkCell * edge = cell->GetEdge( e ); - vtkIdType const nli0 = edge->GetPointId( 0 ); - vtkIdType const nli1 = edge->GetPointId( 1 ); - vtkIdType const ngi0 = globalPtIds[nli0]; - vtkIdType const ngi1 = globalPtIds[nli1]; - tmpEdges.emplace_back( std::minmax( { NodeGlbIdx{ ngi0 }, NodeGlbIdx{ ngi1 } } ) ); - } + std::vector tmpEdges; + std::vector tmpFaces; - for( auto f = 0; f < cell->GetNumberOfFaces(); ++f ) + // Pre-allocation of the temporary vectors. { - vtkCell * face = cell->GetFace( f ); - vtkIdList * pids = face->GetPointIds(); - std::vector< NodeGlbIdx > nodes( pids->GetNumberOfIds() ); - for( std::size_t i = 0; i < nodes.size(); ++i ) + std::size_t numEdges = 0; + std::size_t numFaces = 0; + for (vtkIdType const &c : cellIds) { - vtkIdType const nli = face->GetPointId( i ); - vtkIdType const ngi = globalPtIds[nli]; - nodes[i] = NodeGlbIdx{ ngi }; + vtkCell *cell = mesh->GetCell(c); + numEdges += cell->GetNumberOfEdges(); + numFaces += cell->GetNumberOfFaces(); } - bool isFlipped; - std::uint8_t start; - tmpFaces.emplace_back( reorderFaceNodes( nodes, isFlipped, start ) ); + tmpEdges.reserve(numEdges); + tmpFaces.reserve(numFaces); } - } - std::set< NodeGlbIdx > nodes; - std::transform( std::cbegin( globalPtIds ), std::cend( globalPtIds ), - std::inserter( nodes, std::end( nodes ) ), []( vtkIdType const & id ) - { return NodeGlbIdx{ id }; } ); - - // Removing the duplicates by copying into a `std::set`. - std::set< Edge > edges{ tmpEdges.cbegin(), tmpEdges.cend() }; // SortedArray requires the input to be sorted already. - std::set< Face > faces{ tmpFaces.cbegin(), tmpFaces.cend() }; + // Filling the temporary. + for (auto c = 0; c < mesh->GetNumberOfCells(); ++c) + { + vtkCell *cell = mesh->GetCell(c); - return { std::move( nodes ), std::move( edges ), std::move( faces ) }; -} + for (auto e = 0; e < cell->GetNumberOfEdges(); ++e) + { + vtkCell *edge = cell->GetEdge(e); + vtkIdType const nli0 = edge->GetPointId(0); + vtkIdType const nli1 = edge->GetPointId(1); + vtkIdType const ngi0 = globalPtIds[nli0]; + vtkIdType const ngi1 = globalPtIds[nli1]; + tmpEdges.emplace_back(std::minmax({NodeGlbIdx{ngi0}, NodeGlbIdx{ngi1}})); + } + for (auto f = 0; f < cell->GetNumberOfFaces(); ++f) + { + vtkCell *face = cell->GetFace(f); + vtkIdList *pids = face->GetPointIds(); + std::vector nodes(pids->GetNumberOfIds()); + for (std::size_t i = 0; i < nodes.size(); ++i) + { + vtkIdType const nli = face->GetPointId(i); + vtkIdType const ngi = globalPtIds[nli]; + nodes[i] = NodeGlbIdx{ngi}; + } + bool isFlipped; + std::uint8_t start; + tmpFaces.emplace_back(reorderFaceNodes(nodes, isFlipped, start)); + } + } -Exchange buildFullData( vtkSmartPointer< vtkDataSet > mesh ) -{ - std::set< vtkIdType > cellIds; - for( vtkIdType i = 0; i < mesh->GetNumberOfCells(); ++i ) - { - cellIds.insert( cellIds.end(), i ); - } + std::set nodes; + std::transform(std::cbegin(globalPtIds), std::cend(globalPtIds), + std::inserter(nodes, std::end(nodes)), [](vtkIdType const &id) + { return NodeGlbIdx{id}; }); - return buildSpecificData( mesh, cellIds ); -} + // Removing the duplicates by copying into a `std::set`. + std::set edges{tmpEdges.cbegin(), tmpEdges.cend()}; // SortedArray requires the input to be sorted already. + std::set faces{tmpFaces.cbegin(), tmpFaces.cend()}; -Exchange buildExchangeData( vtkSmartPointer< vtkDataSet > mesh ) -{ - return buildSpecificData( mesh, extractBoundaryCells( mesh ) ); -} - -/** - * @brief - * @tparam GLB_IDX The global index type of the geometrical quantity considered (typically @c EdgeGlbIdx or @c FaceGlbIdx). - * @param sizes The sizes of the intersection bucket (from the current MPI rank). - * @param offsets The offsets for each intersection bucket (from the MPI_Scan process, i.e. the previous ranks). - * @param curRank Current MPI rank. - * @return The offset buckets, updated with the sizes of the current MPI rank. - */ -template< typename GLB_IDX > -std::map< std::set< MpiRank >, GLB_IDX > updateBucketOffsets( std::map< std::set< MpiRank >, localIndex > const & sizes, - std::map< std::set< MpiRank >, GLB_IDX > const & offsets, - MpiRank curRank ) -{ - std::map< std::set< MpiRank >, GLB_IDX > reducedOffsets; + return {std::move(nodes), std::move(edges), std::move(faces)}; + } - // We only keep the offsets that are still relevant to the current and higher ranks. - // Note that `reducedOffsets` will be used by the _current_ rank too. - // Therefore, we'll send information that may not be useful to higher ranks. - // This is surely acceptable because the information is tiny. - for( auto const & [ranks, offset]: offsets ) + Exchange buildFullData(vtkSmartPointer mesh) { - MpiRank const maxConcerned = *std::max_element( ranks.cbegin(), ranks.cend() ); - // TODO invert -// if( maxConcerned >= curRank ) -// { -// reducedOffsets.emplace_hint( reducedOffsets.end(), ranks, offset ); -// } - if( maxConcerned < curRank ) + std::set cellIds; + for (vtkIdType i = 0; i < mesh->GetNumberOfCells(); ++i) { - continue; + cellIds.insert(cellIds.end(), i); } - reducedOffsets.emplace_hint( reducedOffsets.end(), ranks, offset ); + + return buildSpecificData(mesh, cellIds); + } + + Exchange buildExchangeData(vtkSmartPointer mesh) + { + return buildSpecificData(mesh, extractBoundaryCells(mesh)); } - // Add the offsets associated to the new size buckets from the current rank. - GLB_IDX nextOffset{ 0 }; - for( auto const & [ranks, size]: sizes ) + /** + * @brief + * @tparam GLB_IDX The global index type of the geometrical quantity considered (typically @c EdgeGlbIdx or @c FaceGlbIdx). + * @param sizes The sizes of the intersection bucket (from the current MPI rank). + * @param offsets The offsets for each intersection bucket (from the MPI_Scan process, i.e. the previous ranks). + * @param curRank Current MPI rank. + * @return The offset buckets, updated with the sizes of the current MPI rank. + */ + template + std::map, GLB_IDX> updateBucketOffsets(std::map, localIndex> const &sizes, + std::map, GLB_IDX> const &offsets, + MpiRank curRank) { - auto const it = reducedOffsets.find( ranks ); - if( it == reducedOffsets.end() ) + std::map, GLB_IDX> reducedOffsets; + + // We only keep the offsets that are still relevant to the current and higher ranks. + // Note that `reducedOffsets` will be used by the _current_ rank too. + // Therefore, we'll send information that may not be useful to higher ranks. + // This is surely acceptable because the information is tiny. + // We do this by looping through the buckets, finding the beggest rank, and then only adding the bucket and its offset if biggest rank is >= current rank + for (auto const &[ranks, offset] : offsets) { - reducedOffsets.emplace_hint( reducedOffsets.end(), ranks, nextOffset ); - nextOffset += GLB_IDX{ size }; + MpiRank const maxConcerned = *std::max_element(ranks.cbegin(), ranks.cend()); + // TODO invert + // if( maxConcerned >= curRank ) + // { + // reducedOffsets.emplace_hint( reducedOffsets.end(), ranks, offset ); + // } + if (maxConcerned < curRank) + { + continue; + } + reducedOffsets.emplace_hint(reducedOffsets.end(), ranks, offset); } - else + + // Add the offsets associated to the new size buckets from the current rank. + GLB_IDX nextOffset{0}; + // Loop through th buckets in either (edges, faces) + for (auto const &[ranks, size] : sizes) { - nextOffset = it->second + GLB_IDX{ size }; // Define the new offset from the last + // See if this bucket was in the previously indexed data + // if so, then we just need to update the nextOffset, which will be used for the next new data + // if the data is new, then we add its offset to the set of indexed data in reducedOffsets + auto const it = reducedOffsets.find(ranks); + if (it == reducedOffsets.end()) + { + reducedOffsets.emplace_hint(reducedOffsets.end(), ranks, nextOffset); + nextOffset += GLB_IDX{size}; + } + else + { + nextOffset = it->second + GLB_IDX{size}; // Define the new offset from the last + } } - } - // Add an extra entry based for the following rank - reducedOffsets.emplace_hint( reducedOffsets.end(), std::set< MpiRank >{ curRank + 1_mpi }, nextOffset ); + // Add an extra entry based for the following rank + // There are edge cases where a rank cannot start counting without this + reducedOffsets.emplace_hint(reducedOffsets.end(), std::set{curRank + 1_mpi}, nextOffset); - return reducedOffsets; -} + return reducedOffsets; + } -// TODO Duplicated -std::map< MpiRank, Exchange > exchange( int commId, - std::vector< NeighborCommunicator > & neighbors, - Exchange const & data ) -{ - MPI_iCommData commData( commId ); - integer const numNeighbors = LvArray::integerConversion< integer >( neighbors.size() ); - commData.resize( numNeighbors ); + // TODO Duplicated + std::map exchange(int commId, + std::vector &neighbors, + Exchange const &data) + { + MPI_iCommData commData(commId); + integer const numNeighbors = LvArray::integerConversion(neighbors.size()); + commData.resize(numNeighbors); + array1d const cv = convertExchange(data); - array1d< std::uint8_t > const cv = convertExchange( data ); + for (integer i = 0; i < numNeighbors; ++i) + { + neighbors[i].mpiISendReceiveSizes(cv, + commData.mpiSendBufferSizeRequest(i), + commData.mpiRecvBufferSizeRequest(i), + commId, + MPI_COMM_GEOSX); + } - for( integer i = 0; i < numNeighbors; ++i ) - { - neighbors[i].mpiISendReceiveSizes( cv, - commData.mpiSendBufferSizeRequest( i ), - commData.mpiRecvBufferSizeRequest( i ), - commId, - MPI_COMM_GEOSX ); - } + MpiWrapper::waitAll(numNeighbors, commData.mpiSendBufferSizeRequest(), commData.mpiSendBufferSizeStatus()); + MpiWrapper::waitAll(numNeighbors, commData.mpiRecvBufferSizeRequest(), commData.mpiRecvBufferSizeStatus()); - MpiWrapper::waitAll( numNeighbors, commData.mpiSendBufferSizeRequest(), commData.mpiSendBufferSizeStatus() ); - MpiWrapper::waitAll( numNeighbors, commData.mpiRecvBufferSizeRequest(), commData.mpiRecvBufferSizeStatus() ); + array1d> rawExchanged(neighbors.size()); - array1d< array1d< std::uint8_t > > rawExchanged( neighbors.size() ); + for (integer i = 0; i < numNeighbors; ++i) + { + neighbors[i].mpiISendReceiveData(cv, + commData.mpiSendBufferRequest(i), + rawExchanged[i], + commData.mpiRecvBufferRequest(i), + commId, + MPI_COMM_GEOSX); + } + MpiWrapper::waitAll(numNeighbors, commData.mpiSendBufferRequest(), commData.mpiSendBufferStatus()); + MpiWrapper::waitAll(numNeighbors, commData.mpiRecvBufferRequest(), commData.mpiRecvBufferStatus()); - for( integer i = 0; i < numNeighbors; ++i ) - { - neighbors[i].mpiISendReceiveData( cv, - commData.mpiSendBufferRequest( i ), - rawExchanged[i], - commData.mpiRecvBufferRequest( i ), - commId, - MPI_COMM_GEOSX ); + std::map output; + for (auto i = 0; i < numNeighbors; ++i) + { + output[MpiRank{neighbors[i].neighborRank()}] = convertExchange(rawExchanged[i]); + } + return output; } - MpiWrapper::waitAll( numNeighbors, commData.mpiSendBufferRequest(), commData.mpiSendBufferStatus() ); - MpiWrapper::waitAll( numNeighbors, commData.mpiRecvBufferRequest(), commData.mpiRecvBufferStatus() ); - std::map< MpiRank, Exchange > output; - for( auto i = 0; i < numNeighbors; ++i ) - { - output[MpiRank{ neighbors[i].neighborRank() }] = convertExchange( rawExchanged[i] ); - } - return output; -} - -/** - * @brief - * @tparam T Typically NodeGloIdx or a container of NodeGlbIdx (for edges and faces) - * @param exchanged - * @param curRank - * @param neighbors - * @return - */ -template< typename T > -std::map< std::set< MpiRank >, std::set< T > > -buildIntersectionBuckets( std::map< MpiRank, std::set< T > const & > const & exchanged, - MpiRank curRank, - std::set< MpiRank > const & neighbors ) -{ - std::map< T, std::set< MpiRank > > counts; // TODO Use better intersection algorithms? - // We "register" all the edges of the current rank: they are the only one we're interested in. - for( T const & node: exchanged.at( curRank ) ) + /** + * @brief + * @tparam T Typically NodeGloIdx or a container of NodeGlbIdx (for edges and faces) + * @param exchanged + * @param curRank + * @param neighbors + * @return + */ + template + std::map, std::set> + buildIntersectionBuckets(std::map const &> const &exchanged, + MpiRank curRank, + std::set const &neighbors) { - counts.emplace_hint( counts.end(), node, std::set< MpiRank >{ curRank } ); - } + // counts - which ranks use each (node, edge, face) + std::map> counts; // TODO Use better intersection algorithms? - // We now loop on the neighbor edges. - // If a neighbor has an edge in common with the current rank, they we store it. - for( MpiRank const & neighborRank: neighbors ) // This does not include the current rank. - { - for( T const & node: exchanged.at( neighborRank ) ) + // We "register" all the edges of the current rank: they are the only one we're interested in. + // i.e. now exchanged is just one of (nodes, edges, faces), we add keys for each of the (nodes, edges, faces) + // to counts and say it is used by the current rank + for (T const &node : exchanged.at(curRank)) { - auto it = counts.find( node ); - if( it != counts.cend() ) // TODO Extract `counts.cend()` out of the loop. + counts.emplace_hint(counts.end(), node, std::set{curRank}); + } + + // We now loop on the neighbor edges. + // If a neighbor has an edge in common with the current rank, they we store it. + for (MpiRank const &neighborRank : neighbors) // This does not include the current rank. + { + // for each of the (nodes, edges, faces) on the boundary of the other ranks, + // add that that entry is also used by the neighbor rank + for (T const &node : exchanged.at(neighborRank)) { - it->second.insert( neighborRank ); + auto it = counts.find(node); + if (it != counts.cend()) // TODO Extract `counts.cend()` out of the loop. + { + it->second.insert(neighborRank); + } } } - } - std::map< std::set< MpiRank >, std::set< T > > nodeBuckets; - for( auto const & [node, ranks]: counts ) - { - if( ranks.find( curRank ) != ranks.cend() ) + // Counts is essentially the inverse relationship to what we want + // now invert so that we have keys as set of MPI ranks sharing and value as the (node, edge, face) + std::map, std::set> nodeBuckets; + for (auto const &[node, ranks] : counts) { - nodeBuckets[ranks].insert( node ); + if (ranks.find(curRank) != ranks.cend()) + { + nodeBuckets[ranks].insert(node); + } } + return nodeBuckets; } - return nodeBuckets; -} - -/** - * @brief Compute the intersection between for the ranks based on the information @p exchanged. - * @param exchanged Geometrical information provided by the ranks (including the current rank). - * @param curRank The current MPI rank. - * @param neighbors excluding current rank - * @return The intersection buckets. - */ -Buckets buildIntersectionBuckets( std::map< MpiRank, Exchange > const & exchanged, - MpiRank curRank, - std::set< MpiRank > const & neighbors ) -{ - std::map< MpiRank, std::set< NodeGlbIdx > const & > nodeInfo; - for( auto const & [rank, exchange]: exchanged ) - { - nodeInfo.emplace( rank, exchange.nodes ); - } - std::map< std::set< MpiRank >, std::set< NodeGlbIdx > > const nodeBuckets = buildIntersectionBuckets( nodeInfo, curRank, neighbors ); - std::map< MpiRank, std::set< Edge > const & > edgeInfo; - for( auto const & [rank, exchange]: exchanged ) + /** + * @brief Compute the intersection between for the ranks based on the information @p exchanged. + * @param exchanged Geometrical information provided by the ranks (including the current rank). + * @param curRank The current MPI rank. + * @param neighbors excluding current rank + * @return The intersection buckets. + */ + Buckets buildIntersectionBuckets(std::map const &exchanged, + MpiRank curRank, + std::set const &neighbors) { - edgeInfo.emplace( rank, exchange.edges ); - } - std::map< std::set< MpiRank >, std::set< Edge > > const edgeBuckets = buildIntersectionBuckets( edgeInfo, curRank, neighbors ); + // create storage for the set of nodes owned by each rank, then fill using exchange info + // this will be passed to function which computes intersections + std::map const &> nodeInfo; + for (auto const &[rank, exchange] : exchanged) + { + nodeInfo.emplace(rank, exchange.nodes); + } + std::map, std::set> const nodeBuckets = buildIntersectionBuckets(nodeInfo, curRank, neighbors); - // For faces, the algorithm can be a tad simpler because faces can be shared by at most 2 ranks. - std::map< std::set< MpiRank >, std::set< Face > > faceBuckets; - std::set< Face > curFaces = exchanged.at( curRank ).faces; - for( MpiRank const & neighborRank: neighbors ) // This does not include the current rank. - { - for( Face const & face: exchanged.at( neighborRank ).faces ) + // Repeat the process for the edges + std::map const &> edgeInfo; + for (auto const &[rank, exchange] : exchanged) { - auto it = curFaces.find( face ); - if( it != curFaces.cend() ) + edgeInfo.emplace(rank, exchange.edges); + } + std::map, std::set> const edgeBuckets = buildIntersectionBuckets(edgeInfo, curRank, neighbors); + + // For faces, the algorithm can be a tad simpler because faces can be shared by at most 2 ranks. + std::map, std::set> faceBuckets; + std::set curFaces = exchanged.at(curRank).faces; + for (MpiRank const &neighborRank : neighbors) // This does not include the current rank. + { + for (Face const &face : exchanged.at(neighborRank).faces) { - faceBuckets[{ curRank, neighborRank }].insert( *it ); - curFaces.erase( it ); + auto it = curFaces.find(face); + if (it != curFaces.cend()) + { + faceBuckets[{curRank, neighborRank}].insert(*it); + curFaces.erase(it); + } } } + faceBuckets[{curRank}] = curFaces; + + return {nodeBuckets, edgeBuckets, faceBuckets}; } - faceBuckets[{ curRank }] = curFaces; - return { nodeBuckets, edgeBuckets, faceBuckets }; -} + /** + * @brief Estimate an upper bound of the serialized size of the size @p buckets. + * @param buckets + * @return Size in bytes. + * @details @c MPI_Scan requires a fixed buffer size. + * An a-priori estimation of the maximum size of the buffer leads to crazy amounts of memory + * because of the combinatorial pattern of the rank intersections. + * Therefore, we use an initial MPI communication to get a better estimation. + * @node We don't care about the @c nodes because their global numbering is already done. + */ + std::size_t buildMaxBufferSize(Buckets const &buckets) // TODO give the size buckets instead? + { + // this is the custom MPI reduction function we use in the sum across ranks + auto const f = [](auto const &bucket) -> std::size_t + { + std::size_t size = std::size(bucket); + for (auto const &[ranks, _] : bucket) + { + size += std::size(ranks) + 1 + 1; // One `+1` for the size of the ranks set, the other one for the offset + } + return size; + }; + return MpiWrapper::sum(f(buckets.edges) + f(buckets.faces)); // Add the ScannedOffsets::{edge, face}Restart + } -/** - * @brief Estimate an upper bound of the serialized size of the size @p buckets. - * @param buckets - * @return Size in bytes. - * @details @c MPI_Scan requires a fixed buffer size. - * An a-priori estimation of the maximum size of the buffer leads to crazy amounts of memory - * because of the combinatorial pattern of the rank intersections. - * Therefore, we use an initial MPI communication to get a better estimation. - * @node We don't care about the @c nodes because their global numbering is already done. - */ -std::size_t buildMaxBufferSize( Buckets const & buckets ) // TODO give the size buckets instead? -{ - auto const f = []( auto const & bucket ) -> std::size_t + struct BucketSizes { - std::size_t size = std::size( bucket ); - for( auto const & [ranks, _]: bucket ) - { - size += std::size( ranks ) + 1 + 1; // One `+1` for the size of the ranks set, the other one for the offset - } - return size; + using mapping = std::map, localIndex>; + mapping edges; + mapping faces; }; - return MpiWrapper::sum( f( buckets.edges ) + f( buckets.faces ) ); // Add the ScannedOffsets::{edge, face}Restart -} + void to_json(json &j, + const BucketSizes &v) + { + j = json{{"edges", v.edges}, + {"faces", v.faces}}; + } -struct BucketSizes -{ - using mapping = std::map< std::set< MpiRank >, localIndex >; - mapping edges; - mapping faces; -}; + void from_json(const json &j, + BucketSizes &v) + { + v.edges = j.at("edges").get(); + v.faces = j.at("faces").get(); + } -void to_json( json & j, - const BucketSizes & v ) -{ - j = json{ { "edges", v.edges }, - { "faces", v.faces } }; -} + std::vector serialize(BucketSizes const &sizes) + { + return json::to_cbor(json(sizes)); + } -void from_json( const json & j, - BucketSizes & v ) -{ - v.edges = j.at( "edges" ).get< BucketSizes::mapping >(); - v.faces = j.at( "faces" ).get< BucketSizes::mapping >(); -} + std::vector serialize(BucketOffsets const &offsets) + { + return json::to_cbor(json(offsets)); + } -std::vector< std::uint8_t > serialize( BucketSizes const & sizes ) -{ - return json::to_cbor( json( sizes ) ); -} + /** + * @brief Compute the sizes of the intersection buckets. + * @param buckets The intersection buckets. + * @return The buckets of sizes. + */ + BucketSizes getBucketSize(Buckets const &buckets) + { + BucketSizes output; -std::vector< std::uint8_t > serialize( BucketOffsets const & offsets ) -{ - return json::to_cbor( json( offsets ) ); -} + for (auto const &[ranks, edges] : buckets.edges) + { + output.edges.emplace_hint(output.edges.end(), ranks, std::size(edges)); + } -/** - * @brief Compute the sizes of the intersection buckets. - * @param buckets The intersection buckets. - * @return The buckets of sizes. - */ -BucketSizes getBucketSize( Buckets const & buckets ) -{ - BucketSizes output; + for (auto const &[ranks, faces] : buckets.faces) + { + output.faces.emplace_hint(output.faces.end(), ranks, std::size(faces)); + } - for( auto const & [ranks, edges]: buckets.edges ) - { - output.edges.emplace_hint( output.edges.end(), ranks, std::size( edges ) ); + return output; } - for( auto const & [ranks, faces]: buckets.faces ) + /** + * @brief + * @tparam V Container of std::uint8_t + * @param data + * @return + */ + template + BucketOffsets deserialize(V const &data) { - output.faces.emplace_hint( output.faces.end(), ranks, std::size( faces ) ); + return json::from_cbor(data, false).template get(); } - return output; -} + /** + * @brief Custom MPI reduction function. Merges the sizes of the bucket from current rank into numbering offsets. + * @param[in] in Contains the reduced result from the previous ranks. Needs to be unpacked into a @c BucketOffsets instance. + * @param[inout] inout As an @e input, contains a pointer to the @c BucketSizes instance from the current rank. + * As an @e output, will contained the (packed) instance of @c BucketOffsets after the @c reduction. + * The @e output instance will be used by both current and next ranks. + * @param[in] len Number of data. + * @param[in] dataType Type of data. Mean to be @c MPI_BYTE for the current case. + */ + void f(void *in, + void *inout, + int *len, + MPI_Datatype *dataType) + { + GEOS_ASSERT_EQ(*dataType, MPI_BYTE); -/** - * @brief - * @tparam V Container of std::uint8_t - * @param data - * @return - */ -template< class V > -BucketOffsets deserialize( V const & data ) -{ - return json::from_cbor( data, false ).template get< BucketOffsets >(); -} - -/** - * @brief Custom MPI reduction function. Merges the sizes of the bucket from current rank into numbering offsets. - * @param[in] in Contains the reduced result from the previous ranks. Needs to be unpacked into a @c BucketOffsets instance. - * @param[inout] inout As an @e input, contains a pointer to the @c BucketSizes instance from the current rank. - * As an @e output, will contained the (packed) instance of @c BucketOffsets after the @c reduction. - * The @e output instance will be used by both current and next ranks. - * @param[in] len Number of data. - * @param[in] dataType Type of data. Mean to be @c MPI_BYTE for the current case. - */ -void f( void * in, - void * inout, - int * len, - MPI_Datatype * dataType ) -{ - GEOS_ASSERT_EQ( *dataType, MPI_BYTE ); + MpiRank const curRank{MpiWrapper::commRank()}; + + // offsets provided by the previous rank(s) + BucketOffsets const offsets = deserialize(Span((std::uint8_t *)in, *len)); + + // Sizes provided by the current rank, under the form of a pointer to the data. + // No need to serialize, since we're on the same rank. + std::uintptr_t addr; + std::memcpy(&addr, inout, sizeof(std::uintptr_t)); + BucketSizes const *sizes = reinterpret_cast(addr); - MpiRank const curRank{ MpiWrapper::commRank() }; + BucketOffsets updatedOffsets; + updatedOffsets.edges = updateBucketOffsets(sizes->edges, offsets.edges, curRank); + updatedOffsets.faces = updateBucketOffsets(sizes->faces, offsets.faces, curRank); - // offsets provided by the previous rank(s) - BucketOffsets const offsets = deserialize( Span< std::uint8_t >( (std::uint8_t *) in, *len ) ); + // Serialize the updated offsets, so they get sent to the next rank. + std::vector const serialized = serialize(updatedOffsets); + std::memcpy(inout, serialized.data(), serialized.size()); + } - // Sizes provided by the current rank, under the form of a pointer to the data. - // No need to serialize, since we're on the same rank. - std::uintptr_t addr; - std::memcpy( &addr, inout, sizeof( std::uintptr_t ) ); - BucketSizes const * sizes = reinterpret_cast(addr); + std::tuple doTheNewGlobalNumbering(vtkSmartPointer mesh, + std::set const &neighbors) + { + // Now we exchange the data with our neighbors. + MpiRank const curRank{MpiWrapper::commRank()}; - BucketOffsets updatedOffsets; - updatedOffsets.edges = updateBucketOffsets< EdgeGlbIdx >( sizes->edges, offsets.edges, curRank ); - updatedOffsets.faces = updateBucketOffsets< FaceGlbIdx >( sizes->faces, offsets.faces, curRank ); + // ncs holds communicators to the neighboring ranks + std::vector ncs; + ncs.reserve(neighbors.size()); + for (MpiRank const &rank : neighbors) + { + ncs.emplace_back(rank.get()); + } - // Serialize the updated offsets, so they get sent to the next rank. - std::vector< std::uint8_t > const serialized = serialize( updatedOffsets ); - std::memcpy( inout, serialized.data(), serialized.size() ); -} + CommID const commId = CommunicationTools::getInstance().getCommID(); -std::tuple< Buckets, BucketOffsets > doTheNewGlobalNumbering( vtkSmartPointer< vtkDataSet > mesh, - std::set< MpiRank > const & neighbors ) -{ - // Now we exchange the data with our neighbors. - MpiRank const curRank{ MpiWrapper::commRank() }; + // On the current rank, we have a map from the neighbor MPI rank to a corresponding set of exchanged data (sets of nodes, edges, faces) + // buildExchangeData(mesh) just extracts the boundary (faces) of the current rank mesh + // exchange() actually does the communication, so that the output exchanged maps from nhbr rank to their boundary data + std::map exchanged = exchange(int(commId), ncs, buildExchangeData(mesh)); - std::vector< NeighborCommunicator > ncs; - ncs.reserve( neighbors.size() ); - for( MpiRank const & rank: neighbors ) - { - ncs.emplace_back( rank.get() ); - } + // Then I guess we just put the full mesh data for the current rank into the corresponding slot of exchanged, i think this is just convenient + exchanged[MpiRank{MpiWrapper::commRank()}] = buildFullData(mesh); - CommID const commId = CommunicationTools::getInstance().getCommID(); - std::map< MpiRank, Exchange > exchanged = exchange( int( commId ), ncs, buildExchangeData( mesh ) ); - exchanged[MpiRank{ MpiWrapper::commRank() }] = buildFullData( mesh ); + //std::cout << "exchanged on rank " << curRank << " -> " << json( exchanged[MpiRank{ MpiWrapper::commRank() }] ) << std::endl; -// std::cout << "exchanged on rank " << curRank << " -> " << json( exchanged[MpiRank{ MpiWrapper::commRank() }] ) << std::endl; + // Buckets - for each of (nodes, edges, faces), map from set of MPI ranks to the set of (nodes, edges, faces) shared by those ranks + Buckets const buckets = buildIntersectionBuckets(exchanged, curRank, neighbors); - Buckets const buckets = buildIntersectionBuckets( exchanged, curRank, neighbors ); + // Next step is communication. + // We will be passing around info about of the each ranks' buckets, and to do this we need to know the size of the data to be sent + // Technically this changes, as each rank has a different number of buckets + // For MPI scan, we need a fixed send/recv size, so we estimate with a large buffer size + // We dont want to do this too naively, as just taking the total possible cobinations of rank intersections leads to a massive size + // Instead, we do an MPI operation here to estimate an appropriate size + // The idea is to just sum the size of buckets across the ranks + std::size_t const maxBufferSize = 100 * buildMaxBufferSize(buckets); - std::size_t const maxBufferSize = 100 * buildMaxBufferSize( buckets ); + std::vector sendBuffer(maxBufferSize); + std::vector recvBuffer(maxBufferSize); - std::vector< std::uint8_t > sendBuffer( maxBufferSize ); - std::vector< std::uint8_t > recvBuffer( maxBufferSize ); + // The idea is that each rank needs to know where to start indexing the (nodes, edges, faces) it owns + // To do this, we need BucketOffsets, which are like buckets, but instead associate a global index to each set of MPI ranks, which marks the index of the first (node, edge, face) in that bucket + // To get this, we need to know the size of each bucket on the current rank, stored in BucketSizes + BucketSizes const sizes = getBucketSize(buckets); + BucketOffsets offsets; + if (curRank == 0_mpi) + { + // The `MPI_Scan` process will not call the reduction operator for rank 0. + // So we need to reduce for ourselves. + // updateBucketOffsets just takes the bucket sizes on the current rank, + // the previously computed offsets from other MPI ranks (none in this case), + // and then does the additions so that the global indices for the current buckets are known in offsets + offsets.edges = updateBucketOffsets(sizes.edges, {{{ + 0_mpi, + }, + 0_egi}}, + curRank); + offsets.faces = updateBucketOffsets(sizes.faces, {{{ + 0_mpi, + }, + 0_fgi}}, + curRank); + // Still we need to send this reduction to the following rank, by copying to it to the send buffer. + std::vector const bytes = serialize(offsets); + std::memcpy(sendBuffer.data(), bytes.data(), bytes.size()); + } + else + { + // For the other ranks, the reduction operator will be called during the `Mpi_Scan` process. + // So unlike for rank 0, we do not have to do it ourselves. + // In order to provide the `sizes` to the reduction operator, + // since `sizes` will only be used on the current rank, + // we'll provide the information as a pointer to the instance. + // The reduction operator will then compute the new offsets and send them to the following rank. + std::uintptr_t const addr = reinterpret_cast(&sizes); + std::memcpy(sendBuffer.data(), &addr, sizeof(std::uintptr_t)); + } - BucketSizes const sizes = getBucketSize( buckets ); - BucketOffsets offsets; - if( curRank == 0_mpi ) - { - // The `MPI_Scan` process will not call the reduction operator for rank 0. - // So we need to reduce ourselves for ourselves. - offsets.edges = updateBucketOffsets< EdgeGlbIdx >( sizes.edges, { { { 0_mpi, }, 0_egi } }, curRank ); - offsets.faces = updateBucketOffsets< FaceGlbIdx >( sizes.faces, { { { 0_mpi, }, 0_fgi } }, curRank ); - // Still we need to send this reduction to the following rank, by copying to it to the send buffer. - std::vector< std::uint8_t > const bytes = serialize( offsets ); - std::memcpy( sendBuffer.data(), bytes.data(), bytes.size() ); - } - else - { - // For the other ranks, the reduction operator will be called during the `Mpi_Scan` process. - // So unlike for rank 0, we do not have to do it ourselves. - // In order to provide the `sizes` to the reduction operator, - // since `sizes` will only be used on the current rank, - // we'll provide the information as a pointer to the instance. - // The reduction operator will then compute the new offsets and send them to the following rank. - std::uintptr_t const addr = reinterpret_cast(&sizes); - std::memcpy( sendBuffer.data(), &addr, sizeof( std::uintptr_t ) ); - } + // f is our mpi reduction function, all it essentially does is call the updateBucketOffsets as above, + // but it also handles the packing/unpacking of transferred data + // Note that, right now, packing/unpacking is done via json, this should be changed to GEOS packing routines + MPI_Op op; + MPI_Op_create(f, false, &op); - MPI_Op op; - MPI_Op_create( f, false, &op ); + MPI_Scan(sendBuffer.data(), recvBuffer.data(), maxBufferSize, MPI_BYTE, op, MPI_COMM_WORLD); - MPI_Scan( sendBuffer.data(), recvBuffer.data(), maxBufferSize, MPI_BYTE, op, MPI_COMM_WORLD ); + if (curRank != 0_mpi) + { + offsets = deserialize(recvBuffer); + } - if( curRank != 0_mpi ) - { - offsets = deserialize( recvBuffer ); + // With the offsets for each bucket, we are done with global numbering, + // as each (edge, face) is numbered by starting from the offset for that bucket and counting up + return {std::move(buckets), std::move(offsets)}; } - return { std::move( buckets ), std::move( offsets ) }; -} - } // end of namespace \ No newline at end of file