From f13ca1925e438282e405a41cec5b209624898b89 Mon Sep 17 00:00:00 2001 From: Michael Howard Date: Wed, 25 Sep 2024 21:49:30 -0500 Subject: [PATCH 01/10] Raise exceptions in serialized MPI methods --- hoomd/HOOMDMPI.h | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/hoomd/HOOMDMPI.h b/hoomd/HOOMDMPI.h index f292de06c3..5266ba03f1 100644 --- a/hoomd/HOOMDMPI.h +++ b/hoomd/HOOMDMPI.h @@ -130,7 +130,7 @@ template void bcast(T& val, unsigned int root, const MPI_Comm mpi_co MPI_Comm_rank(mpi_comm, &rank); char* buf = NULL; - unsigned int recv_count; + int recv_count; if (rank == (int)root) { std::stringstream s(std::ios_base::out | std::ios_base::binary); @@ -144,7 +144,11 @@ template void bcast(T& val, unsigned int root, const MPI_Comm mpi_co // copy string to send buffer std::string str = s.str(); - recv_count = (unsigned int)str.size(); + if (str.length() > std::numeric_limits::max()) + { + throw std::runtime_error("Serialized bytes overflow MPI limit"); + } + recv_count = static_cast(str.size()); buf = new char[recv_count]; str.copy(buf, recv_count); } @@ -182,7 +186,7 @@ void scatter_v(const std::vector& in_values, assert(in_values.size() == (unsigned int)size); - unsigned int recv_count; + int recv_count; int* send_counts = NULL; int* displs = NULL; @@ -194,7 +198,7 @@ void scatter_v(const std::vector& in_values, // construct a vector of serialized objects typename std::vector::const_iterator it; std::vector str; - unsigned int len = 0; + size_t len = 0; for (it = in_values.begin(); it != in_values.end(); ++it) { unsigned int idx = (unsigned int)(it - in_values.begin()); @@ -207,7 +211,11 @@ void scatter_v(const std::vector& in_values, str.push_back(s.str()); displs[idx] = (idx > 0) ? displs[idx - 1] + send_counts[idx - 1] : 0; - send_counts[idx] = (unsigned int)(str[idx].length()); + if (str[idx].length() > std::numeric_limits::max()) + { + throw std::runtime_error("Serialized bytes overflow MPI limit"); + } + send_counts[idx] = static_cast(str[idx].length()); len += send_counts[idx]; } @@ -262,7 +270,11 @@ void gather_v(const T& in_value, // copy into send buffer std::string str = s.str(); - unsigned int send_count = (unsigned int)str.length(); + if (str.length() > std::numeric_limits::max()) + { + throw std::runtime_error("Serialized bytes overflow MPI limit"); + } + int send_count = static_cast(str.length()); int* recv_counts = NULL; int* displs = NULL; @@ -279,7 +291,7 @@ void gather_v(const T& in_value, char* rbuf = NULL; if (rank == (int)root) { - unsigned int len = 0; + size_t len = 0; for (unsigned int i = 0; i < (unsigned int)size; i++) { displs[i] = (i > 0) ? displs[i - 1] + recv_counts[i - 1] : 0; @@ -335,7 +347,11 @@ void all_gather_v(const T& in_value, std::vector& out_values, const MPI_Comm // copy into send buffer std::string str = s.str(); - unsigned int send_count = (unsigned int)str.length(); + if (str.length() > std::numeric_limits::max()) + { + throw std::runtime_error("Serialized bytes overflow MPI limit"); + } + int send_count = static_cast(str.length()); // allocate memory for buffer lengths out_values.resize(size); @@ -346,7 +362,7 @@ void all_gather_v(const T& in_value, std::vector& out_values, const MPI_Comm MPI_Allgather(&send_count, 1, MPI_INT, recv_counts, 1, MPI_INT, mpi_comm); // allocate receiver buffer - unsigned int len = 0; + size_t len = 0; for (unsigned int i = 0; i < (unsigned int)size; i++) { displs[i] = (i > 0) ? displs[i - 1] + recv_counts[i - 1] : 0; @@ -400,7 +416,11 @@ template void send(const T& val, const unsigned int dest, const MPI_ // copy string to send buffer std::string str = s.str(); - recv_count = (unsigned int)str.size(); + if (str.length() > std::numeric_limits::max()) + { + throw std::runtime_error("Serialized bytes overflow MPI limit"); + } + recv_count = static_cast(str.size()); buf = new char[recv_count]; str.copy(buf, recv_count); From 678120c3fe9ebc67cde9d1841d102ff635e696f0 Mon Sep 17 00:00:00 2001 From: Michael Howard Date: Wed, 25 Sep 2024 21:50:02 -0500 Subject: [PATCH 02/10] Create custom MPI datatypes for HOOMD data --- hoomd/MPIConfiguration.cc | 77 +++++++++++++++++++++++++++++++++++++++ hoomd/MPIConfiguration.h | 19 ++++++++++ 2 files changed, 96 insertions(+) diff --git a/hoomd/MPIConfiguration.cc b/hoomd/MPIConfiguration.cc index 075315dd8d..424777e589 100644 --- a/hoomd/MPIConfiguration.cc +++ b/hoomd/MPIConfiguration.cc @@ -3,6 +3,8 @@ #include "MPIConfiguration.h" +#include "VectorMath.h" + #ifdef ENABLE_MPI #include "HOOMDMPI.h" #endif @@ -36,6 +38,59 @@ MPIConfiguration::MPIConfiguration( int rank; MPI_Comm_rank(m_mpi_comm, &rank); m_rank = rank; + + // create scalar2 data type for MPI + { + int blocklengths[] = {1, 1}; + MPI_Datatype types[] = {MPI_HOOMD_SCALAR, MPI_HOOMD_SCALAR}; + MPI_Aint offsets[] = {offsetof(Scalar2, x), offsetof(Scalar2, y)}; + + MPI_Datatype tmp; + MPI_Type_create_struct(2, blocklengths, offsets, types, &tmp); + MPI_Type_create_resized(tmp, 0, sizeof(Scalar2), &m_mpi_scalar2); + MPI_Type_commit(&m_mpi_scalar2); + } + + // create scalar3 data type for MPI + { + int blocklengths[] = {1, 1, 1}; + MPI_Datatype types[] = {MPI_HOOMD_SCALAR, MPI_HOOMD_SCALAR, MPI_HOOMD_SCALAR}; + MPI_Aint offsets[] = {offsetof(Scalar3, x), offsetof(Scalar3, y), offsetof(Scalar3, z)}; + + MPI_Datatype tmp; + MPI_Type_create_struct(3, blocklengths, offsets, types, &tmp); + MPI_Type_create_resized(tmp, 0, sizeof(Scalar3), &m_mpi_scalar3); + MPI_Type_commit(&m_mpi_scalar3); + } + + // create vec3 data type for MPI + { + int blocklengths[] = {1, 1, 1}; + MPI_Datatype types[] = {MPI_HOOMD_SCALAR, MPI_HOOMD_SCALAR, MPI_HOOMD_SCALAR}; + MPI_Aint offsets[] + = {offsetof(vec3, x), offsetof(vec3, y), offsetof(vec3, z)}; + + MPI_Datatype tmp; + MPI_Type_create_struct(3, blocklengths, offsets, types, &tmp); + MPI_Type_create_resized(tmp, 0, sizeof(vec3), &m_mpi_vec3_scalar); + MPI_Type_commit(&m_mpi_vec3_scalar); + } + + // create scalar4 data type for MPI + { + int blocklengths[] = {1, 1, 1, 1}; + MPI_Datatype types[] + = {MPI_HOOMD_SCALAR, MPI_HOOMD_SCALAR, MPI_HOOMD_SCALAR, MPI_HOOMD_SCALAR}; + MPI_Aint offsets[] = {offsetof(Scalar4, x), + offsetof(Scalar4, y), + offsetof(Scalar4, z), + offsetof(Scalar4, w)}; + + MPI_Datatype tmp; + MPI_Type_create_struct(4, blocklengths, offsets, types, &tmp); + MPI_Type_create_resized(tmp, 0, sizeof(Scalar4), &m_mpi_scalar4); + MPI_Type_commit(&m_mpi_scalar4); + } #endif } @@ -82,6 +137,28 @@ unsigned int MPIConfiguration::getNRanks() const #endif } +#ifdef ENABLE_MPI +MPI_Datatype MPIConfiguration::getScalar2Datatype() const + { + return m_mpi_scalar2; + } + +MPI_Datatype MPIConfiguration::getScalar3Datatype() const + { + return m_mpi_scalar3; + } + +MPI_Datatype MPIConfiguration::getVec3ScalarDatatype() const + { + return m_mpi_vec3_scalar; + } + +MPI_Datatype MPIConfiguration::getScalar4Datatype() const + { + return m_mpi_scalar4; + } +#endif // ENABLE_MPI + namespace detail { void export_MPIConfiguration(pybind11::module& m) diff --git a/hoomd/MPIConfiguration.h b/hoomd/MPIConfiguration.h index ac050fa44e..e1a6965926 100644 --- a/hoomd/MPIConfiguration.h +++ b/hoomd/MPIConfiguration.h @@ -139,10 +139,29 @@ class PYBIND11_EXPORT MPIConfiguration return walltime; } +#ifdef ENABLE_MPI + //! Get Scalar2 datatype + MPI_Datatype getScalar2Datatype() const; + + //! Get Scalar3 datatype + MPI_Datatype getScalar3Datatype() const; + + //! Get vec3 datatype + MPI_Datatype getVec3ScalarDatatype() const; + + //! Get Scalar4 datatype + MPI_Datatype getScalar4Datatype() const; +#endif // ENABLE_MPI + protected: #ifdef ENABLE_MPI MPI_Comm m_mpi_comm; //!< The MPI communicator MPI_Comm m_hoomd_world; //!< The HOOMD world communicator + + MPI_Datatype m_mpi_scalar2; //!< HOOMD Scalar2 MPI datatype + MPI_Datatype m_mpi_scalar3; //!< HOOMD Scalar3 MPI datatype + MPI_Datatype m_mpi_vec3_scalar; //!< HOOMD vec3 MPI datatype + MPI_Datatype m_mpi_scalar4; //!< HOOMD Scalar4 MPI datatype #endif unsigned int m_rank; //!< Rank of this processor (0 if running in single-processor mode) unsigned int m_n_rank; //!< Ranks per partition From 883adca1c441554ea8cc619ce8f326c7be4da978 Mon Sep 17 00:00:00 2001 From: Michael Howard Date: Wed, 25 Sep 2024 21:53:20 -0500 Subject: [PATCH 03/10] Scatter snapshots using custom datatype --- hoomd/mpcd/Communicator.cc | 27 +---- hoomd/mpcd/Communicator.h | 1 - hoomd/mpcd/CommunicatorGPU.cc | 5 +- hoomd/mpcd/ParticleData.cc | 194 ++++++++++++++++++++-------------- hoomd/mpcd/ParticleData.h | 4 + 5 files changed, 127 insertions(+), 104 deletions(-) diff --git a/hoomd/mpcd/Communicator.cc b/hoomd/mpcd/Communicator.cc index 1ccf25f28e..91b890df6d 100644 --- a/hoomd/mpcd/Communicator.cc +++ b/hoomd/mpcd/Communicator.cc @@ -49,23 +49,6 @@ mpcd::Communicator::Communicator(std::shared_ptr sysdef) GPUArray adj_mask(neigh_max, m_exec_conf); m_adj_mask.swap(adj_mask); - // create new data type for the pdata_element - const int nitems = 4; - int blocklengths[nitems] = {4, 4, 1, 1}; - MPI_Datatype types[nitems] = {MPI_HOOMD_SCALAR, MPI_HOOMD_SCALAR, MPI_UNSIGNED, MPI_UNSIGNED}; - MPI_Aint offsets[nitems]; - offsets[0] = offsetof(mpcd::detail::pdata_element, pos); - offsets[1] = offsetof(mpcd::detail::pdata_element, vel); - offsets[2] = offsetof(mpcd::detail::pdata_element, tag); - offsets[3] = offsetof(mpcd::detail::pdata_element, comm_flag); - // this needs to be made via the resize method to get its upper bound correctly - MPI_Datatype tmp; - MPI_Type_create_struct(nitems, blocklengths, offsets, types, &tmp); - MPI_Type_commit(&tmp); - MPI_Type_create_resized(tmp, 0, sizeof(mpcd::detail::pdata_element), &m_pdata_element); - MPI_Type_commit(&m_pdata_element); - MPI_Type_free(&tmp); - initializeNeighborArrays(); } @@ -73,7 +56,6 @@ mpcd::Communicator::~Communicator() { m_exec_conf->msg->notice(5) << "Destroying MPCD Communicator" << std::endl; detachCallbacks(); - MPI_Type_free(&m_pdata_element); } void mpcd::Communicator::initializeNeighborArrays() @@ -349,11 +331,12 @@ void mpcd::Communicator::migrateParticles(uint64_t timestep) access_mode::overwrite); m_reqs.resize(4); int nreq = 0; + const MPI_Datatype mpi_pdata_element = m_mpcd_pdata->getElementMPIDatatype(); if (n_send_right != 0) { MPI_Isend(h_sendbuf.data + n_keep, n_send_right, - m_pdata_element, + mpi_pdata_element, right_neigh, 1, m_mpi_comm, @@ -363,7 +346,7 @@ void mpcd::Communicator::migrateParticles(uint64_t timestep) { MPI_Isend(h_sendbuf.data + n_keep + n_send_right, n_send_left, - m_pdata_element, + mpi_pdata_element, left_neigh, 1, m_mpi_comm, @@ -373,7 +356,7 @@ void mpcd::Communicator::migrateParticles(uint64_t timestep) { MPI_Irecv(h_recvbuf.data + n_recv, n_recv_right, - m_pdata_element, + mpi_pdata_element, right_neigh, 1, m_mpi_comm, @@ -383,7 +366,7 @@ void mpcd::Communicator::migrateParticles(uint64_t timestep) { MPI_Irecv(h_recvbuf.data + n_recv + n_recv_right, n_recv_left, - m_pdata_element, + mpi_pdata_element, left_neigh, 1, m_mpi_comm, diff --git a/hoomd/mpcd/Communicator.h b/hoomd/mpcd/Communicator.h index 0728074589..b0e78fd0c9 100644 --- a/hoomd/mpcd/Communicator.h +++ b/hoomd/mpcd/Communicator.h @@ -198,7 +198,6 @@ class PYBIND11_EXPORT Communicator : public Autotuned //! Helper function to initialize adjacency arrays void initializeNeighborArrays(); - MPI_Datatype m_pdata_element; //!< MPI struct for pdata_element GPUVector m_sendbuf; //!< Buffer for particles that are sent GPUVector m_recvbuf; //!< Buffer for particles that are received std::vector m_reqs; //!< MPI requests diff --git a/hoomd/mpcd/CommunicatorGPU.cc b/hoomd/mpcd/CommunicatorGPU.cc index 66c7972c67..15e6173754 100644 --- a/hoomd/mpcd/CommunicatorGPU.cc +++ b/hoomd/mpcd/CommunicatorGPU.cc @@ -302,6 +302,7 @@ void mpcd::CommunicatorGPU::migrateParticles(uint64_t timestep) // loop over neighbors unsigned int nreq = 0; m_reqs.resize(2 * m_n_unique_neigh); + const MPI_Datatype mpi_pdata_element = m_mpcd_pdata->getElementMPIDatatype(); unsigned int sendidx = 0; for (unsigned int ineigh = 0; ineigh < m_n_unique_neigh; ++ineigh) { @@ -313,7 +314,7 @@ void mpcd::CommunicatorGPU::migrateParticles(uint64_t timestep) { MPI_Isend(h_sendbuf.data + sendidx, m_n_send_ptls[ineigh], - m_pdata_element, + mpi_pdata_element, neighbor, 1, m_mpi_comm, @@ -327,7 +328,7 @@ void mpcd::CommunicatorGPU::migrateParticles(uint64_t timestep) { MPI_Irecv(h_recvbuf.data + m_offsets[ineigh], m_n_recv_ptls[ineigh], - m_pdata_element, + mpi_pdata_element, neighbor, 1, m_mpi_comm, diff --git a/hoomd/mpcd/ParticleData.cc b/hoomd/mpcd/ParticleData.cc index 8b9bcc8347..109852096d 100644 --- a/hoomd/mpcd/ParticleData.cc +++ b/hoomd/mpcd/ParticleData.cc @@ -14,6 +14,10 @@ #include "hoomd/HOOMDMPI.h" #endif // ENABLE_MPI +#include +#include +#include + #include #include @@ -55,7 +59,7 @@ mpcd::ParticleData::ParticleData(unsigned int N, setupMPI(decomposition); if (m_exec_conf->getNRanks() > 1) { - bcast(my_seed, 0, m_exec_conf->getMPICommunicator()); + MPI_Bcast(&my_seed, 1, MPI_UNSIGNED, 0, m_exec_conf->getMPICommunicator()); my_seed += m_exec_conf->getRank(); // each rank must get a different seed value for C++11 PRNG } @@ -119,38 +123,39 @@ void mpcd::ParticleData::initializeFromSnapshot(const mpcd::ParticleDataSnapshot } // global number of particles - unsigned int nglobal(0); + unsigned int N_global(0); #ifdef ENABLE_MPI if (m_decomposition) { - // Define per-processor particle data - std::vector> pos_proc; // Position array of every processor - std::vector> vel_proc; // Velocities array of every processor - std::vector> type_proc; // Particle types array of every processor - std::vector> tag_proc; // Global tags array of every processor - std::vector N_proc; // Number of particles on every processor - - // resize to number of ranks in communicator const MPI_Comm mpi_comm = m_exec_conf->getMPICommunicator(); unsigned int n_ranks = m_exec_conf->getNRanks(); unsigned int rank = m_exec_conf->getRank(); - pos_proc.resize(n_ranks); - vel_proc.resize(n_ranks); - type_proc.resize(n_ranks); - tag_proc.resize(n_ranks); - N_proc.resize(n_ranks, 0); - - // scatter information to all processors from rank 0 (root) const unsigned int root = 0; + + // assign each particle to a rank + unsigned int num_types = 0; + std::vector num_per_rank; // number per rank + std::vector rank_displacements; // displacement of particles per rank + std::vector particle_rank; // rank each particle belongs to + std::vector particles; if (rank == root) { const Index3D& di = m_decomposition->getDomainIndexer(); - unsigned int n_ranks = m_exec_conf->getNRanks(); ArrayHandle h_cart_ranks(m_decomposition->getCartRanks(), access_location::host, access_mode::read); + // global particle number is snapshot size, also allocate temporary data for sending + N_global = snapshot.size; + particle_rank.resize(N_global); + particles.resize(N_global); + + // temporary data for counts per rank + num_per_rank.resize(n_ranks); + rank_displacements.resize(n_ranks); + std::fill(num_per_rank.begin(), num_per_rank.end(), 0); + // loop over particles in snapshot, place them into domains for (auto it = snapshot.position.begin(); it != snapshot.position.end(); ++it) { @@ -211,54 +216,70 @@ void mpcd::ParticleData::initializeFromSnapshot(const mpcd::ParticleDataSnapshot throw std::runtime_error("Error initializing from snapshot."); } + // pack particle into pdata element + mpcd::detail::pdata_element particle; + particle.pos + = make_scalar4(pos.x, pos.y, pos.z, __int_as_scalar(snapshot.type[snap_idx])); + const auto vel = snapshot.velocity[snap_idx]; + particle.vel + = make_scalar4(vel.x, vel.y, vel.z, __int_as_scalar(mpcd::detail::NO_CELL)); + particle.tag = snap_idx; + particle.comm_flag = 0; + // fill up per-processor data structures - pos_proc[rank].push_back(pos); - vel_proc[rank].push_back(vec_to_scalar3(snapshot.velocity[snap_idx])); - type_proc[rank].push_back(snapshot.type[snap_idx]); - tag_proc[rank].push_back(nglobal++); - ++N_proc[rank]; + particle_rank[snap_idx] = rank; + particles[snap_idx] = particle; + ++num_per_rank[rank]; } // mass is set equal for all particles m_mass = snapshot.mass; - } - // get type mapping - m_type_mapping = snapshot.type_mapping; - - if (rank != root) - { - m_type_mapping.clear(); + // assign type map from snapshot + m_type_mapping = snapshot.type_mapping; + num_types = static_cast(m_type_mapping.size()); + + // sort particles by rank + thrust::exclusive_scan(thrust::host, + num_per_rank.begin(), + num_per_rank.end(), + rank_displacements.begin(), + 0); + thrust::sort_by_key(thrust::host, + particle_rank.begin(), + particle_rank.end(), + particles.begin()); } + // broadcast global number of particles + MPI_Bcast(&N_global, 1, MPI_UNSIGNED, root, mpi_comm); + // broadcast the particle mass - bcast(m_mass, root, mpi_comm); + MPI_Bcast(&m_mass, 1, MPI_HOOMD_SCALAR, root, mpi_comm); // broadcast type mapping + MPI_Bcast(&num_types, 1, MPI_UNSIGNED, root, mpi_comm); + if (rank != root) + { + m_type_mapping.resize(num_types); + } bcast(m_type_mapping, root, mpi_comm); - // broadcast global number of particles - bcast(nglobal, root, mpi_comm); - - // Local particle data - std::vector pos; - std::vector vel; - std::vector type; - std::vector tag; + // scatter the number of particles on each rank and allocate + MPI_Scatter(num_per_rank.data(), 1, MPI_UNSIGNED, &m_N, 1, MPI_UNSIGNED, root, mpi_comm); + allocate(std::max(1u, m_N)); + std::vector recv_particles(m_N); // distribute particle data to processors - scatter_v(pos_proc, pos, root, mpi_comm); - scatter_v(vel_proc, vel, root, mpi_comm); - scatter_v(type_proc, type, root, mpi_comm); - scatter_v(tag_proc, tag, root, mpi_comm); - scatter_v(N_proc, m_N, root, mpi_comm); - - // we have to allocate even if the number of particles on a processor - // is zero, so that the arrays can be resized later - if (m_N == 0) - allocate(1); - else - allocate(m_N); + MPI_Scatterv(particles.data(), + num_per_rank.data(), + rank_displacements.data(), + m_mpi_pdata_element, + recv_particles.data(), + m_N, + m_mpi_pdata_element, + root, + mpi_comm); // Fill-up particle data arrays ArrayHandle h_pos(m_pos, access_location::host, access_mode::overwrite); @@ -269,52 +290,46 @@ void mpcd::ParticleData::initializeFromSnapshot(const mpcd::ParticleDataSnapshot access_mode::overwrite); for (unsigned int idx = 0; idx < m_N; idx++) { - h_pos.data[idx] - = make_scalar4(pos[idx].x, pos[idx].y, pos[idx].z, __int_as_scalar(type[idx])); - h_vel.data[idx] = make_scalar4(vel[idx].x, - vel[idx].y, - vel[idx].z, - __int_as_scalar(mpcd::detail::NO_CELL)); - h_tag.data[idx] = tag[idx]; + const mpcd::detail::pdata_element particle = recv_particles[idx]; + h_pos.data[idx] = particle.pos; + h_vel.data[idx] = particle.vel; + h_tag.data[idx] = particle.tag; h_comm_flag.data[idx] = 0; // initialize with zero by default } } else #endif // ENABLE_MPI { - allocate(snapshot.size); + // number of local particles is the global number + N_global = snapshot.size; + allocate(N_global); + m_N = N_global; ArrayHandle h_pos(m_pos, access_location::host, access_mode::overwrite); ArrayHandle h_vel(m_vel, access_location::host, access_mode::overwrite); ArrayHandle h_tag(m_tag, access_location::host, access_mode::overwrite); - for (unsigned int snap_idx = 0; snap_idx < snapshot.size; ++snap_idx) + for (unsigned int idx = 0; idx < m_N; ++idx) { - h_pos.data[nglobal] = make_scalar4(snapshot.position[snap_idx].x, - snapshot.position[snap_idx].y, - snapshot.position[snap_idx].z, - __int_as_scalar(snapshot.type[snap_idx])); - h_vel.data[nglobal] = make_scalar4(snapshot.velocity[snap_idx].x, - snapshot.velocity[snap_idx].y, - snapshot.velocity[snap_idx].z, - __int_as_scalar(mpcd::detail::NO_CELL)); - h_tag.data[nglobal] = nglobal; - nglobal++; + const auto pos = snapshot.position[idx]; + h_pos.data[idx] + = make_scalar4(pos.x, pos.y, pos.z, __int_as_scalar(snapshot.type[idx])); + + const auto vel = snapshot.velocity[idx]; + h_vel.data[idx] + = make_scalar4(vel.x, vel.y, vel.z, __int_as_scalar(mpcd::detail::NO_CELL)); + + h_tag.data[idx] = idx; } // mass is equal for all particles m_mass = snapshot.mass; - // number of local particles is the global number - m_N = nglobal; - // initialize type mapping m_type_mapping = snapshot.type_mapping; } - setNGlobal(nglobal); - - // TODO: any particle data signaling to subscribers + setNGlobal(N_global); } /*! @@ -559,7 +574,7 @@ bool mpcd::ParticleData::checkSnapshot(const mpcd::ParticleDataSnapshot& snapsho #ifdef ENABLE_MPI if (m_decomposition) { - bcast(valid_snapshot, 0, m_exec_conf->getMPICommunicator()); + MPI_Bcast(&valid_snapshot, 1, MPI_CXX_BOOL, 0, m_exec_conf->getMPICommunicator()); } #endif @@ -606,7 +621,7 @@ bool mpcd::ParticleData::checkInBox(const mpcd::ParticleDataSnapshot& snapshot, #ifdef ENABLE_MPI if (m_decomposition) { - bcast(in_box, 0, m_exec_conf->getMPICommunicator()); + MPI_Bcast(&in_box, 1, MPI_CXX_BOOL, 0, m_exec_conf->getMPICommunicator()); } #endif return in_box; @@ -760,7 +775,7 @@ void mpcd::ParticleData::setMass(Scalar mass) // in mpi, the mass must be synced between all ranks if (m_decomposition) { - bcast(m_mass, 0, m_exec_conf->getMPICommunicator()); + MPI_Bcast(&m_mass, 1, MPI_HOOMD_SCALAR, 0, m_exec_conf->getMPICommunicator()); } #endif // ENABLE_MPI } @@ -910,6 +925,12 @@ unsigned int mpcd::ParticleData::addVirtualParticles(unsigned int N) } #ifdef ENABLE_MPI + +MPI_Datatype mpcd::ParticleData::getElementMPIDatatype() const + { + return m_mpi_pdata_element; + } + /*! * \param out Buffer into which particle data is packed * \param mask Mask for \a m_comm_flags to determine if communication is necessary @@ -1268,6 +1289,21 @@ void mpcd::ParticleData::setupMPI(std::shared_ptr decomposi m_autotuners.insert(m_autotuners.end(), {m_mark_tuner, m_remove_tuner, m_add_tuner}); } #endif // ENABLE_HIP + + // create new data type for the pdata_element + { + const MPI_Datatype mpi_scalar4 = m_exec_conf->getMPIConfig()->getScalar4Datatype(); + int blocklengths[] = {1, 1, 1, 1}; + MPI_Datatype types[] = {mpi_scalar4, mpi_scalar4, MPI_UNSIGNED, MPI_UNSIGNED}; + MPI_Aint offsets[] = {offsetof(mpcd::detail::pdata_element, pos), + offsetof(mpcd::detail::pdata_element, vel), + offsetof(mpcd::detail::pdata_element, tag), + offsetof(mpcd::detail::pdata_element, comm_flag)}; + MPI_Datatype tmp; + MPI_Type_create_struct(4, blocklengths, offsets, types, &tmp); + MPI_Type_create_resized(tmp, 0, sizeof(mpcd::detail::pdata_element), &m_mpi_pdata_element); + MPI_Type_commit(&m_mpi_pdata_element); + } } #endif // ENABLE_MPI diff --git a/hoomd/mpcd/ParticleData.h b/hoomd/mpcd/ParticleData.h index 4b2f56b945..854f0f80f5 100644 --- a/hoomd/mpcd/ParticleData.h +++ b/hoomd/mpcd/ParticleData.h @@ -360,6 +360,9 @@ class PYBIND11_EXPORT ParticleData : public Autotuned //! \name communication methods //@{ + //! Get MPI datatype for pdata_element + MPI_Datatype getElementMPIDatatype() const; + //! Pack particle data into a buffer void removeParticles(GPUVector& out, unsigned int mask, @@ -419,6 +422,7 @@ class PYBIND11_EXPORT ParticleData : public Autotuned std::vector m_type_mapping; //!< Type name mapping #ifdef ENABLE_MPI GPUArray m_comm_flags; //!< MPCD particle communication flags + MPI_Datatype m_mpi_pdata_element; //!< MPI datatype for pdata_element #endif // ENABLE_MPI GPUArray m_pos_alt; //!< Alternate position array From cfc1058c4f683a0088d8a2ee64ff27c8303efa9f Mon Sep 17 00:00:00 2001 From: Michael Howard Date: Wed, 25 Sep 2024 21:55:39 -0500 Subject: [PATCH 04/10] Remove other uses of serialized bcast from MPCD code --- hoomd/SnapshotSystemData.cc | 10 ++++++---- hoomd/mpcd/ParticleDataSnapshot.cc | 29 +++++++++++++++++++++++------ hoomd/mpcd/ParticleDataSnapshot.h | 3 ++- hoomd/mpcd/VirtualParticleFiller.cc | 2 +- 4 files changed, 32 insertions(+), 12 deletions(-) diff --git a/hoomd/SnapshotSystemData.cc b/hoomd/SnapshotSystemData.cc index 9192d16cc8..ce40ea5e96 100644 --- a/hoomd/SnapshotSystemData.cc +++ b/hoomd/SnapshotSystemData.cc @@ -94,7 +94,8 @@ void SnapshotSystemData::broadcast(unsigned int root, { #ifdef ENABLE_MPI auto communicator = exec_conf->getMPICommunicator(); - broadcast_box(exec_conf->getMPIConfig()); + auto mpi_config = exec_conf->getMPIConfig(); + broadcast_box(mpi_config); if (exec_conf->getNRanks() > 1) { particle_data.bcast(root, communicator); @@ -105,7 +106,7 @@ void SnapshotSystemData::broadcast(unsigned int root, constraint_data.bcast(root, communicator); pair_data.bcast(root, communicator); #ifdef BUILD_MPCD - mpcd_data.bcast(root, communicator); + mpcd_data.bcast(root, communicator, mpi_config); #endif } #endif @@ -117,9 +118,10 @@ void SnapshotSystemData::broadcast_all(unsigned int root, { #ifdef ENABLE_MPI MPI_Comm hoomd_world = exec_conf->getHOOMDWorldMPICommunicator(); + auto mpi_config = exec_conf->getMPIConfig(); int n_ranks; MPI_Comm_size(hoomd_world, &n_ranks); - broadcast_box(exec_conf->getMPIConfig()); + broadcast_box(mpi_config); if (n_ranks > 0) { particle_data.bcast(root, hoomd_world); @@ -130,7 +132,7 @@ void SnapshotSystemData::broadcast_all(unsigned int root, constraint_data.bcast(root, hoomd_world); pair_data.bcast(root, hoomd_world); #ifdef BUILD_MPCD - mpcd_data.bcast(root, hoomd_world); + mpcd_data.bcast(root, hoomd_world, mpi_config); #endif } #endif diff --git a/hoomd/mpcd/ParticleDataSnapshot.cc b/hoomd/mpcd/ParticleDataSnapshot.cc index 5e208e8610..47c7fb7f8f 100644 --- a/hoomd/mpcd/ParticleDataSnapshot.cc +++ b/hoomd/mpcd/ParticleDataSnapshot.cc @@ -70,13 +70,30 @@ bool mpcd::ParticleDataSnapshot::validate() const * \param root Root rank to broadcast from * \param mpi_comm MPI communicator to use for broadcasting */ -void mpcd::ParticleDataSnapshot::bcast(unsigned int root, MPI_Comm mpi_comm) +void mpcd::ParticleDataSnapshot::bcast(unsigned int root, + MPI_Comm mpi_comm, + std::shared_ptr mpi_config) { - hoomd::bcast(size, root, mpi_comm); - hoomd::bcast(position, root, mpi_comm); - hoomd::bcast(velocity, root, mpi_comm); - hoomd::bcast(type, root, mpi_comm); - hoomd::bcast(mass, root, mpi_comm); + int rank; + MPI_Comm_rank(mpi_comm, &rank); + + // broadcast size and resize + int N; + if (rank == static_cast(root)) + { + N = size; + } + MPI_Bcast(&N, 1, MPI_UNSIGNED, root, mpi_comm); + if (rank != static_cast(root)) + { + resize(N); + } + + const MPI_Datatype mpi_vec3 = mpi_config->getVec3ScalarDatatype(); + MPI_Bcast(position.data(), size, mpi_vec3, root, mpi_comm); + MPI_Bcast(velocity.data(), size, mpi_vec3, root, mpi_comm); + MPI_Bcast(type.data(), size, MPI_UNSIGNED, root, mpi_comm); + MPI_Bcast(&mass, 1, MPI_HOOMD_SCALAR, root, mpi_comm); hoomd::bcast(type_mapping, root, mpi_comm); } #endif diff --git a/hoomd/mpcd/ParticleDataSnapshot.h b/hoomd/mpcd/ParticleDataSnapshot.h index bbed6e84e2..1f9fac5c3b 100644 --- a/hoomd/mpcd/ParticleDataSnapshot.h +++ b/hoomd/mpcd/ParticleDataSnapshot.h @@ -14,6 +14,7 @@ #endif #include "hoomd/BoxDim.h" +#include "hoomd/MPIConfiguration.h" #include "hoomd/VectorMath.h" // pybind11 #include @@ -68,7 +69,7 @@ class PYBIND11_EXPORT ParticleDataSnapshot #ifdef ENABLE_MPI //! Broadcast the snapshot using MPI - void bcast(unsigned int root, MPI_Comm mpi_comm); + void bcast(unsigned int root, MPI_Comm mpi_comm, std::shared_ptr mpi_config); #endif //! Replicate the snapshot data diff --git a/hoomd/mpcd/VirtualParticleFiller.cc b/hoomd/mpcd/VirtualParticleFiller.cc index a5611aaba7..304032d19f 100644 --- a/hoomd/mpcd/VirtualParticleFiller.cc +++ b/hoomd/mpcd/VirtualParticleFiller.cc @@ -26,7 +26,7 @@ mpcd::VirtualParticleFiller::VirtualParticleFiller(std::shared_ptrgetNRanks() > 1) { - bcast(m_filler_id, 0, m_exec_conf->getMPICommunicator()); + MPI_Bcast(&m_filler_id, 1, MPI_UNSIGNED, 0, m_exec_conf->getMPICommunicator()); } #endif // ENABLE_MPI } From f23d6e0d33e222397aed1777b124c4b0268201ce Mon Sep 17 00:00:00 2001 From: Michael Howard Date: Wed, 25 Sep 2024 22:40:43 -0500 Subject: [PATCH 05/10] Don't use thrust --- hoomd/mpcd/ParticleData.cc | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/hoomd/mpcd/ParticleData.cc b/hoomd/mpcd/ParticleData.cc index 109852096d..fa48a6a89d 100644 --- a/hoomd/mpcd/ParticleData.cc +++ b/hoomd/mpcd/ParticleData.cc @@ -14,12 +14,9 @@ #include "hoomd/HOOMDMPI.h" #endif // ENABLE_MPI -#include -#include -#include - #include +#include #include #include using namespace std; @@ -135,9 +132,8 @@ void mpcd::ParticleData::initializeFromSnapshot(const mpcd::ParticleDataSnapshot // assign each particle to a rank unsigned int num_types = 0; - std::vector num_per_rank; // number per rank - std::vector rank_displacements; // displacement of particles per rank - std::vector particle_rank; // rank each particle belongs to + std::vector num_per_rank; // number per rank + std::vector rank_displacements; // displacement of particles per rank std::vector particles; if (rank == root) { @@ -148,7 +144,6 @@ void mpcd::ParticleData::initializeFromSnapshot(const mpcd::ParticleDataSnapshot // global particle number is snapshot size, also allocate temporary data for sending N_global = snapshot.size; - particle_rank.resize(N_global); particles.resize(N_global); // temporary data for counts per rank @@ -224,10 +219,9 @@ void mpcd::ParticleData::initializeFromSnapshot(const mpcd::ParticleDataSnapshot particle.vel = make_scalar4(vel.x, vel.y, vel.z, __int_as_scalar(mpcd::detail::NO_CELL)); particle.tag = snap_idx; - particle.comm_flag = 0; + particle.comm_flag = rank; // hijack the comm flag to sort by rank // fill up per-processor data structures - particle_rank[snap_idx] = rank; particles[snap_idx] = particle; ++num_per_rank[rank]; } @@ -239,16 +233,18 @@ void mpcd::ParticleData::initializeFromSnapshot(const mpcd::ParticleDataSnapshot m_type_mapping = snapshot.type_mapping; num_types = static_cast(m_type_mapping.size()); + // exclusive scan of displacements + rank_displacements[0] = 0; + for (size_t i = 1; i < num_per_rank.size(); ++i) + { + rank_displacements[i] = rank_displacements[i - 1] + num_per_rank[i - 1]; + } + // sort particles by rank - thrust::exclusive_scan(thrust::host, - num_per_rank.begin(), - num_per_rank.end(), - rank_displacements.begin(), - 0); - thrust::sort_by_key(thrust::host, - particle_rank.begin(), - particle_rank.end(), - particles.begin()); + std::sort(particles.begin(), + particles.end(), + [](const mpcd::detail::pdata_element& a, const mpcd::detail::pdata_element& b) + { return a.comm_flag < b.comm_flag; }); } // broadcast global number of particles From b1164109079354750d2e792fd02fb8950ef8df02 Mon Sep 17 00:00:00 2001 From: Michael Howard Date: Wed, 20 Nov 2024 11:55:40 -0600 Subject: [PATCH 06/10] Use new data structure to gather snapshots --- hoomd/mpcd/ParticleData.cc | 145 ++++++++++++++++++++++--------------- 1 file changed, 87 insertions(+), 58 deletions(-) diff --git a/hoomd/mpcd/ParticleData.cc b/hoomd/mpcd/ParticleData.cc index fa48a6a89d..efafaecec4 100644 --- a/hoomd/mpcd/ParticleData.cc +++ b/hoomd/mpcd/ParticleData.cc @@ -134,7 +134,7 @@ void mpcd::ParticleData::initializeFromSnapshot(const mpcd::ParticleDataSnapshot unsigned int num_types = 0; std::vector num_per_rank; // number per rank std::vector rank_displacements; // displacement of particles per rank - std::vector particles; + std::vector particles; if (rank == root) { const Index3D& di = m_decomposition->getDomainIndexer(); @@ -460,64 +460,80 @@ void mpcd::ParticleData::takeSnapshot(mpcd::ParticleDataSnapshot& snapshot, #ifdef ENABLE_MPI if (m_decomposition) { - // gather a global snapshot - std::vector pos(m_N); - std::vector vel(m_N); - std::vector type(m_N); - std::vector tag(m_N); + const MPI_Comm mpi_comm = m_exec_conf->getMPICommunicator(); + const unsigned int n_ranks = m_exec_conf->getNRanks(); + const unsigned int rank = m_exec_conf->getRank(); + const unsigned int root = 0; + + /* + * Stage particles to send on each rank. The root rank gets enough memory to hold ALL + * particles later even though we only fill with the local number for now. + */ + std::vector particles((rank != root) ? m_N : m_N_global); for (unsigned int idx = 0; idx < m_N; ++idx) { - pos[idx] = make_scalar3(h_pos.data[idx].x, h_pos.data[idx].y, h_pos.data[idx].z); - vel[idx] = make_scalar3(h_vel.data[idx].x, h_vel.data[idx].y, h_vel.data[idx].z); - type[idx] = __scalar_as_int(h_pos.data[idx].w); - tag[idx] = h_tag.data[idx]; + mpcd::detail::pdata_element particle; + particle.pos = h_pos.data[idx]; + particle.vel = h_vel.data[idx]; + particle.tag = h_tag.data[idx]; + particle.comm_flag = 0; + particles[idx] = particle; } - // Create per-processor arrays to gather the data back to root - std::vector> pos_proc; // Position array of every processor - std::vector> vel_proc; // Velocities array of every processor - std::vector> type_proc; // Particle types array of every processor - std::vector> tag_proc; // Tag array of every processor - - // resize to number of ranks in communicator - const MPI_Comm mpi_comm = m_exec_conf->getMPICommunicator(); - const unsigned int n_ranks = m_exec_conf->getNRanks(); - const unsigned int rank = m_exec_conf->getRank(); - pos_proc.resize(n_ranks); - vel_proc.resize(n_ranks); - type_proc.resize(n_ranks); - tag_proc.resize(n_ranks); + // size particles per rank and displacements + std::vector num_per_rank(n_ranks); // number per rank + std::vector rank_displacements(n_ranks); // displacement of particles per rank + MPI_Gather(&m_N, 1, MPI_UNSIGNED, num_per_rank.data(), 1, MPI_UNSIGNED, root, mpi_comm); + rank_displacements[0] = 0; + for (size_t i = 1; i < num_per_rank.size(); ++i) + { + rank_displacements[i] = rank_displacements[i - 1] + num_per_rank[i - 1]; + } - // collect all particle data on the root processor - const unsigned int root = 0; - gather_v(pos, pos_proc, root, mpi_comm); - gather_v(vel, vel_proc, root, mpi_comm); - gather_v(type, type_proc, root, mpi_comm); - gather_v(tag, tag_proc, root, mpi_comm); + // gather data back to root rank + MPI_Gatherv((rank != root) ? particles.data() : MPI_IN_PLACE, + m_N, + m_mpi_pdata_element, + particles.data(), + num_per_rank.data(), + rank_displacements.data(), + m_mpi_pdata_element, + root, + mpi_comm); if (rank == root) { + /* + * First sort the particles into tag order. This is probably a slow step, but could be + * sped up by sorting on each rank first then merging the sorted segments here. + */ + std::sort(particles.begin(), + particles.end(), + [](const mpcd::detail::pdata_element& a, const mpcd::detail::pdata_element& b) + { return a.tag < b.tag; }); + // allocate memory in snapshot - snapshot.resize(getNGlobal()); + snapshot.resize(m_N_global); - // write back into the snapshot in tag order, don't really care about cache coherency - for (unsigned int rank_idx = 0; rank_idx < n_ranks; ++rank_idx) + // unpack into snapshot + for (unsigned int idx = 0; idx < m_N_global; ++idx) { - const unsigned int N = (unsigned int)pos_proc[rank_idx].size(); - for (unsigned int idx = 0; idx < N; ++idx) - { - const unsigned int snap_idx = tag_proc[rank_idx][idx]; + const auto particle = particles[idx]; - // make sure the position stored in the snapshot is within the boundaries - Scalar3 pos_i = pos_proc[rank_idx][idx]; - int3 img = make_int3(0, 0, 0); - global_box->wrap(pos_i, img); + // wrapped position + const Scalar4 postype = particle.pos; + Scalar3 pos = make_scalar3(postype.x, postype.y, postype.z); + int3 img = make_int3(0, 0, 0); + global_box->wrap(pos, img); + snapshot.position[idx] = vec3(pos); - // push particle into the snapshot - snapshot.position[snap_idx] = vec3(pos_i); - snapshot.velocity[snap_idx] = vec3(vel_proc[rank_idx][idx]); - snapshot.type[snap_idx] = type_proc[rank_idx][idx]; - } + // typeid + snapshot.type[idx] = __scalar_as_int(postype.w); + + // velocity + const Scalar4 velcell = particle.vel; + const Scalar3 vel = make_scalar3(velcell.x, velcell.y, velcell.z); + snapshot.velocity[idx] = vec3(vel); } } } @@ -525,24 +541,37 @@ void mpcd::ParticleData::takeSnapshot(mpcd::ParticleDataSnapshot& snapshot, #endif { // allocate memory in snapshot - snapshot.resize(getNGlobal()); + snapshot.resize(m_N); + + // sort by tags + std::vector> tag_index(m_N); + for (unsigned int idx = 0; idx < m_N; ++idx) + { + tag_index[idx] = std::make_pair(h_tag.data[idx], idx); + } + std::sort(tag_index.begin(), + tag_index.end(), + [](const auto& a, const auto& b) { return a.first < b.first; }); - // iterate through particles + // unpack into snapshot for (unsigned int idx = 0; idx < m_N; ++idx) { - const unsigned int snap_idx = h_tag.data[idx]; + const unsigned int pidx = tag_index[idx].second; - // make sure the position stored in the snapshot is within the boundaries - Scalar4 postype = h_pos.data[idx]; - Scalar3 pos_i = make_scalar3(postype.x, postype.y, postype.z); - const unsigned int type_i = __scalar_as_int(postype.w); + // wrapped position + const Scalar4 postype = h_pos.data[pidx]; + Scalar3 pos = make_scalar3(postype.x, postype.y, postype.z); int3 img = make_int3(0, 0, 0); - global_box->wrap(pos_i, img); + global_box->wrap(pos, img); + snapshot.position[idx] = vec3(pos); + + // typeid + snapshot.type[idx] = __scalar_as_int(postype.w); - // push particle into the snapshot - snapshot.position[snap_idx] = vec3(pos_i); - snapshot.velocity[snap_idx] = vec3(h_vel.data[idx]); - snapshot.type[snap_idx] = type_i; + // velocity + const Scalar4 velcell = h_vel.data[pidx]; + const Scalar3 vel = make_scalar3(velcell.x, velcell.y, velcell.z); + snapshot.velocity[idx] = vec3(vel); } } From 379afa9feec3923bf3ccb238ff756b18e85ff884 Mon Sep 17 00:00:00 2001 From: Michael Howard Date: Wed, 20 Nov 2024 12:32:26 -0600 Subject: [PATCH 07/10] Free created types in destructors --- hoomd/MPIConfiguration.cc | 8 ++++++++ hoomd/MPIConfiguration.h | 2 +- hoomd/mpcd/ParticleData.cc | 3 +++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/hoomd/MPIConfiguration.cc b/hoomd/MPIConfiguration.cc index 424777e589..eaffbb2f4d 100644 --- a/hoomd/MPIConfiguration.cc +++ b/hoomd/MPIConfiguration.cc @@ -94,6 +94,14 @@ MPIConfiguration::MPIConfiguration( #endif } +MPIConfiguration::~MPIConfiguration() + { + MPI_Type_free(&m_mpi_scalar2); + MPI_Type_free(&m_mpi_scalar3); + MPI_Type_free(&m_mpi_vec3_scalar); + MPI_Type_free(&m_mpi_scalar4); + } + void MPIConfiguration::splitPartitions(unsigned int nrank) { #ifdef ENABLE_MPI diff --git a/hoomd/MPIConfiguration.h b/hoomd/MPIConfiguration.h index e1a6965926..896f9f63b6 100644 --- a/hoomd/MPIConfiguration.h +++ b/hoomd/MPIConfiguration.h @@ -45,7 +45,7 @@ class PYBIND11_EXPORT MPIConfiguration ); //! Destructor - virtual ~MPIConfiguration() { }; + virtual ~MPIConfiguration(); #ifdef ENABLE_MPI MPI_Comm operator()() const diff --git a/hoomd/mpcd/ParticleData.cc b/hoomd/mpcd/ParticleData.cc index efafaecec4..3fdede6212 100644 --- a/hoomd/mpcd/ParticleData.cc +++ b/hoomd/mpcd/ParticleData.cc @@ -91,6 +91,9 @@ mpcd::ParticleData::ParticleData(const mpcd::ParticleDataSnapshot& snapshot, mpcd::ParticleData::~ParticleData() { m_exec_conf->msg->notice(5) << "Destroying MPCD ParticleData" << endl; +#ifdef ENABLE_MPI + MPI_Type_free(&m_mpi_pdata_element); +#endif // ENABLE_MPI } /*! From 558e9983b06c55a12156509b07e37bdcaf91e7b8 Mon Sep 17 00:00:00 2001 From: Michael Howard Date: Wed, 20 Nov 2024 12:49:08 -0600 Subject: [PATCH 08/10] Use derived types in base communicator --- hoomd/Communicator.cc | 81 +++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 41 deletions(-) diff --git a/hoomd/Communicator.cc b/hoomd/Communicator.cc index ec56d4e520..56c49ba974 100644 --- a/hoomd/Communicator.cc +++ b/hoomd/Communicator.cc @@ -1297,47 +1297,46 @@ Communicator::Communicator(std::shared_ptr sysdef, initializeNeighborArrays(); - /* create a type for pdata_element */ - const int nitems = 14; - int blocklengths[14] = {4, 4, 3, 1, 1, 3, 1, 4, 4, 3, 1, 4, 4, 6}; - MPI_Datatype types[14] = {MPI_HOOMD_SCALAR, - MPI_HOOMD_SCALAR, - MPI_HOOMD_SCALAR, - MPI_HOOMD_SCALAR, - MPI_HOOMD_SCALAR, - MPI_INT, - MPI_UNSIGNED, - MPI_HOOMD_SCALAR, - MPI_HOOMD_SCALAR, - MPI_HOOMD_SCALAR, - MPI_UNSIGNED, - MPI_HOOMD_SCALAR, - MPI_HOOMD_SCALAR, - MPI_HOOMD_SCALAR}; - MPI_Aint offsets[14]; - - offsets[0] = offsetof(detail::pdata_element, pos); - offsets[1] = offsetof(detail::pdata_element, vel); - offsets[2] = offsetof(detail::pdata_element, accel); - offsets[3] = offsetof(detail::pdata_element, charge); - offsets[4] = offsetof(detail::pdata_element, diameter); - offsets[5] = offsetof(detail::pdata_element, image); - offsets[6] = offsetof(detail::pdata_element, body); - offsets[7] = offsetof(detail::pdata_element, orientation); - offsets[8] = offsetof(detail::pdata_element, angmom); - offsets[9] = offsetof(detail::pdata_element, inertia); - offsets[10] = offsetof(detail::pdata_element, tag); - offsets[11] = offsetof(detail::pdata_element, net_force); - offsets[12] = offsetof(detail::pdata_element, net_torque); - offsets[13] = offsetof(detail::pdata_element, net_virial); - - MPI_Datatype tmp; - MPI_Type_create_struct(nitems, blocklengths, offsets, types, &tmp); - MPI_Type_commit(&tmp); - - MPI_Type_create_resized(tmp, 0, sizeof(detail::pdata_element), &m_mpi_pdata_element); - MPI_Type_commit(&m_mpi_pdata_element); - MPI_Type_free(&tmp); + // create a type for pdata_element + { + const MPI_Datatype mpi_scalar3 = m_exec_conf->getMPIConfig()->getScalar3Datatype(); + const MPI_Datatype mpi_scalar4 = m_exec_conf->getMPIConfig()->getScalar4Datatype(); + const unsigned int nitems = 14; + int blocklengths[nitems] = {1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 6}; + MPI_Datatype types[nitems] = {mpi_scalar4, + mpi_scalar4, + mpi_scalar3, + MPI_HOOMD_SCALAR, + MPI_HOOMD_SCALAR, + MPI_INT, + MPI_UNSIGNED, + mpi_scalar4, + mpi_scalar4, + mpi_scalar3, + MPI_UNSIGNED, + mpi_scalar4, + mpi_scalar4, + MPI_HOOMD_SCALAR}; + MPI_Aint offsets[nitems] = {offsetof(detail::pdata_element, pos), + offsetof(detail::pdata_element, vel), + offsetof(detail::pdata_element, accel), + offsetof(detail::pdata_element, charge), + offsetof(detail::pdata_element, diameter), + offsetof(detail::pdata_element, image), + offsetof(detail::pdata_element, body), + offsetof(detail::pdata_element, orientation), + offsetof(detail::pdata_element, angmom), + offsetof(detail::pdata_element, inertia), + offsetof(detail::pdata_element, tag), + offsetof(detail::pdata_element, net_force), + offsetof(detail::pdata_element, net_torque), + offsetof(detail::pdata_element, net_virial)}; + + MPI_Datatype tmp; + MPI_Type_create_struct(nitems, blocklengths, offsets, types, &tmp); + MPI_Type_create_resized(tmp, 0, sizeof(detail::pdata_element), &m_mpi_pdata_element); + MPI_Type_commit(&m_mpi_pdata_element); + } } //! Destructor From 604ba15b0e5ef1d9dec2306bd8649983ea62d4a3 Mon Sep 17 00:00:00 2001 From: Michael Howard Date: Wed, 20 Nov 2024 12:55:44 -0600 Subject: [PATCH 09/10] Fix compile error in non-MPI builds --- hoomd/MPIConfiguration.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hoomd/MPIConfiguration.cc b/hoomd/MPIConfiguration.cc index eaffbb2f4d..eb0093d4b0 100644 --- a/hoomd/MPIConfiguration.cc +++ b/hoomd/MPIConfiguration.cc @@ -96,10 +96,12 @@ MPIConfiguration::MPIConfiguration( MPIConfiguration::~MPIConfiguration() { +#ifdef ENABLE_MPI MPI_Type_free(&m_mpi_scalar2); MPI_Type_free(&m_mpi_scalar3); MPI_Type_free(&m_mpi_vec3_scalar); MPI_Type_free(&m_mpi_scalar4); +#endif // ENABLE_MPI } void MPIConfiguration::splitPartitions(unsigned int nrank) From a389953de3c9e29b2819e4e0e19adca27d5a3b55 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Wed, 27 Nov 2024 07:36:36 -0500 Subject: [PATCH 10/10] Mention MPCD initialization fix. --- CHANGELOG.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 3cc1a6fbe3..ade8eb7594 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -29,6 +29,9 @@ Change Log * ``hoomd.hpmc.pair.Pair.energy`` now computes the correct energy when there are multiple pair potentials with different ``r_cut`` values (`#1955 `__). +* Initializing large numbers (~100+ million) of MPCD particles with domain decomposition no longer + causes a segmentation fault + (`#1897 `__). *Added*